diff options
author | Eric Wong <e@80x24.org> | 2023-01-08 05:03:26 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2023-01-08 05:05:01 +0000 |
commit | b5ab9be6686aa778a4cfd7622c598736b9c42321 (patch) | |
tree | a967297606a1f91f8359e287d5a85ad018e185e3 /ext/mwrap/mwrap.c | |
parent | 4356beb8237a92b3902b17f55cfe93d347b593d5 (diff) | |
parent | 2c25edb01139365f4754985c1e3494765dd1e5a7 (diff) | |
download | mwrap-b5ab9be6686aa778a4cfd7622c598736b9c42321.tar.gz |
This contains many changes from https://80x24.org/mwrap-perl.git commit * Built-in RCU-friendly version of dlmalloc, no more fragile dlsym(3m) resolution of malloc-family functions in the constructor * Allocations are now backed by O_TMPFILE on $TMPDIR on modern Linux. Since mwrap increases memory usage greatly and I needed to use it on a system where I needed more VM space but lacked the ability to add swap. * Configurable C backtrace level via MWRAP=bt:$DEPTH where $DEPTH is a non-negative integer. Be careful about increasing it, even a depth of 3-4 can be orders-of-magnitude more expensive in time and space. This can be changed dynamically at runtime via local HTTP (see below). * Embedded per-process local-socket-only HTTP server obsoletes MwrapRack when combined with mwrap-rproxy from the Perl dist (set `MWRAP=socket_dir:/dir/of/sockets') See https://80x24.org/mwrap-perl/20221210015518.272576-4-e@80x24.org/ for more info. It now supports downloading CSV (suitable for importing into sqlite 3.32.0+) * License switched to GPL-3+ to be compatible with GNU binutils since we may take code from addr2line in the future. * libxxhash supported if XXH3_64bits is available.
Diffstat (limited to 'ext/mwrap/mwrap.c')
-rw-r--r-- | ext/mwrap/mwrap.c | 396 |
1 files changed, 396 insertions, 0 deletions
diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c new file mode 100644 index 0000000..d88fee6 --- /dev/null +++ b/ext/mwrap/mwrap.c @@ -0,0 +1,396 @@ +/* + * Copyright (C) mwrap hackers <mwrap-public@80x24.org> + * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt> + */ +#define MWRAP_RUBY 1 +#include "mwrap_core.h" + +static ID id_uminus; +extern VALUE __attribute__((weak)) rb_cObject; +extern VALUE __attribute__((weak)) rb_eTypeError; +extern VALUE __attribute__((weak)) rb_yield(VALUE); + +/* + * call-seq: + * + * Mwrap.dump([[io] [, min]] -> nil + * + * Dumps the current totals to +io+ which must be an IO object + * (StringIO and similar are not supported). Total sizes smaller + * than or equal to +min+ are skipped. + * + * The output is space-delimited by 3 columns: + * + * total_size call_count location + */ +static VALUE mwrap_dump(int argc, VALUE *argv, VALUE mod) +{ + VALUE io, min; + struct dump_arg a; + rb_io_t *fptr; + + rb_scan_args(argc, argv, "02", &io, &min); + + if (NIL_P(io)) + /* library may be linked w/o Ruby */ + io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr")); + + a.min = NIL_P(min) ? 0 : NUM2SIZET(min); + io = rb_io_get_io(io); + io = rb_io_get_write_io(io); + GetOpenFile(io, fptr); + a.fp = rb_io_stdio_file(fptr); + + rb_thread_call_without_gvl((void *(*)(void *))dump_to_file, &a, 0, 0); + RB_GC_GUARD(io); + return Qnil; +} + +/* The whole operation is not remotely atomic... */ +static void *totals_reset(void *ign) +{ + mwrap_reset(); + return NULL; +} + +/* + * call-seq: + * + * Mwrap.reset -> nil + * + * Resets the the total tables by zero-ing all counters. + * This resets all statistics. This is not an atomic operation + * as other threads (outside of GVL) may increment counters. + */ +static VALUE reset_m(VALUE mod) +{ + rb_thread_call_without_gvl(totals_reset, 0, 0, 0); + return Qnil; +} + +static VALUE rcu_unlock_ensure(VALUE ignored) +{ + rcu_read_unlock(); + --locating; + return Qfalse; +} + +static VALUE location_string(const struct src_loc *l) +{ + VALUE tmp = rb_str_new(NULL, 0); + + if (l->f) { + rb_str_cat(tmp, l->f->fn, l->f->fn_len); + if (l->lineno == U24_MAX) + rb_str_cat_cstr(tmp, ":-"); + else + rb_str_catf(tmp, ":%u", l->lineno); + } + if (l->bt_len) { + AUTO_FREE char **s = bt_syms(l->bt, l->bt_len); + + if (s) { + if (l->f) + rb_str_cat_cstr(tmp, "\n"); + rb_str_cat_cstr(tmp, s[0]); + for (uint32_t i = 1; i < l->bt_len; ++i) + rb_str_catf(tmp, "\n%s", s[i]); + } + } + + /* deduplicate and try to free up some memory */ + VALUE ret = rb_funcall(tmp, id_uminus, 0); + if (!OBJ_FROZEN_RAW(tmp)) + rb_str_resize(tmp, 0); + + return ret; +} + +static VALUE dump_each_rcu(VALUE x) +{ + struct dump_arg *a = (struct dump_arg *)x; + struct cds_lfht *t; + struct cds_lfht_iter iter; + struct src_loc *l; + + t = CMM_LOAD_SHARED(totals); + cds_lfht_for_each_entry(t, &iter, l, hnode) { + VALUE v[6]; + if (l->total <= a->min) continue; + + v[0] = location_string(l); + v[1] = SIZET2NUM(l->total); + v[2] = SIZET2NUM(l->allocations); + v[3] = SIZET2NUM(l->frees); + v[4] = SIZET2NUM(l->age_total); + v[5] = SIZET2NUM(l->max_lifespan); + + rb_yield_values2(6, v); + assert(rcu_read_ongoing()); + } + return Qnil; +} + +/* + * call-seq: + * + * Mwrap.each([min]) do |location,total,allocations,frees,age_total,max_lifespan| + * ... + * end + * + * Yields each entry of the of the table to a caller-supplied block. + * +min+ may be specified to filter out lines with +total+ bytes + * equal-to-or-smaller-than the supplied minimum. + */ +static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod) +{ + VALUE min; + struct dump_arg a; + + rb_scan_args(argc, argv, "01", &min); + a.min = NIL_P(min) ? 0 : NUM2SIZET(min); + + ++locating; + rcu_read_lock(); + + return rb_ensure(dump_each_rcu, (VALUE)&a, rcu_unlock_ensure, 0); +} + +static size_t +src_loc_memsize(const void *p) +{ + return sizeof(struct src_loc); +} + +static const rb_data_type_t src_loc_type = { + "source_location", + /* no marking, no freeing */ + { 0, 0, src_loc_memsize, /* reserved */ }, + /* parent, data, [ flags ] */ +}; + +static VALUE cSrcLoc; + +/* + * call-seq: + * Mwrap[location] -> Mwrap::SourceLocation + * + * Returns the associated Mwrap::SourceLocation given the +location+ + * String. +location+ is either a Ruby source location path:line + * (e.g. "/path/to/foo.rb:5") or a hexadecimal memory address with + * square-braces part yielded by Mwrap.dump (e.g. "[0xdeadbeef]") + */ +static VALUE mwrap_aref(VALUE mod, VALUE loc) +{ + const char *str = StringValueCStr(loc); + long len = RSTRING_LEN(loc); + assert(len >= 0); + struct src_loc *l = mwrap_get(str, (size_t)len); + + return l ? TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l) : Qnil; +} + +static VALUE src_loc_each_i(VALUE p) +{ + struct alloc_hdr *h; + struct src_loc *l = (struct src_loc *)p; + + cds_list_for_each_entry_rcu(h, &l->allocs, anode) { + size_t gen = uatomic_read(&h->as.live.gen); + size_t size = uatomic_read(&h->size); + + if (size) { + VALUE v[2]; + v[0] = SIZET2NUM(size); + v[1] = SIZET2NUM(gen); + + rb_yield_values2(2, v); + } + } + + return Qfalse; +} + +static struct src_loc *src_loc_of(VALUE self) +{ + struct src_loc *l; + TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l); + assert(l); + return l; +} + +/* + * call-seq: + * loc = Mwrap[location] + * loc.each { |size,generation| ... } + * + * Iterates through live allocations for a given Mwrap::SourceLocation, + * yielding the +size+ (in bytes) and +generation+ of each allocation. + * The +generation+ is the value of the GC.count method at the time + * the allocation was made. + * + * This functionality is only available in mwrap 2.0.0+ + */ +static VALUE src_loc_each(VALUE self) +{ + struct src_loc *l = src_loc_of(self); + + assert(locating == 0 && "forgot to clear locating"); + ++locating; + rcu_read_lock(); + rb_ensure(src_loc_each_i, (VALUE)l, rcu_unlock_ensure, 0); + return self; +} + +/* + * The the mean lifespan (in GC generations) of allocations made from this + * location. This does not account for live allocations. + */ +static VALUE src_loc_mean_lifespan(VALUE self) +{ + struct src_loc *l = src_loc_of(self); + size_t tot, frees; + + frees = uatomic_read(&l->frees); + tot = uatomic_read(&l->age_total); + return DBL2NUM(frees ? ((double)tot/(double)frees) : HUGE_VAL); +} + +/* The number of frees made from this location */ +static VALUE src_loc_frees(VALUE self) +{ + return SIZET2NUM(uatomic_read(&src_loc_of(self)->frees)); +} + +/* The number of allocations made from this location */ +static VALUE src_loc_allocations(VALUE self) +{ + return SIZET2NUM(uatomic_read(&src_loc_of(self)->allocations)); +} + +/* The total number of bytes allocated from this location */ +static VALUE src_loc_total(VALUE self) +{ + return SIZET2NUM(uatomic_read(&src_loc_of(self)->total)); +} + +/* + * The maximum age (in GC generations) of an allocation before it was freed. + * This does not account for live allocations. + */ +static VALUE src_loc_max_lifespan(VALUE self) +{ + return SIZET2NUM(uatomic_read(&src_loc_of(self)->max_lifespan)); +} + +/* + * Returns a frozen String location of the given SourceLocation object. + */ +static VALUE src_loc_name(VALUE self) +{ + struct src_loc *l = src_loc_of(self); + VALUE ret; + + ++locating; + ret = location_string(l); + --locating; + return ret; +} + +static VALUE reset_locating(VALUE ign) { --locating; return Qfalse; } + +/* + * call-seq: + * + * Mwrap.quiet do |depth| + * # expensive sort/calculate/emitting results of Mwrap.each + * # affecting statistics of the rest of the app + * end + * + * Stops allocation tracking inside the block. This is useful for + * monitoring code which calls other Mwrap (or ObjectSpace/GC) + * functions which unavoidably allocate memory. + * + * This feature was added in mwrap 2.0.0+ + */ +static VALUE mwrap_quiet(VALUE mod) +{ + size_t cur = ++locating; + return rb_ensure(rb_yield, SIZET2NUM(cur), reset_locating, 0); +} + +/* + * total bytes allocated as tracked by mwrap + */ +static VALUE total_inc(VALUE mod) +{ + return SIZET2NUM(total_bytes_inc); +} + +/* + * total bytes freed as tracked by mwrap + */ +static VALUE total_dec(VALUE mod) +{ + return SIZET2NUM(total_bytes_dec); +} + +/* + * Document-module: Mwrap + * + * require 'mwrap' + * + * Mwrap has a dual function as both a Ruby C extension and LD_PRELOAD + * wrapper. As a Ruby C extension, it exposes a limited Ruby API. + * To be effective at gathering status, mwrap must be loaded as a + * LD_PRELOAD (using the mwrap(1) executable makes it easy) + * + * ENVIRONMENT + * + * The "MWRAP" environment variable contains a comma-delimited list + * of key:value options for automatically dumping at program exit. + * + * * dump_fd: a writable FD to dump to + * * dump_path: a path to dump to, the file is opened in O_APPEND mode + * * dump_min: the minimum allocation size (total) to dump + * + * If both `dump_fd' and `dump_path' are specified, dump_path takes + * precedence. + */ +void Init_mwrap(void) +{ + VALUE mod; + + ++locating; + mod = rb_define_module("Mwrap"); + id_uminus = rb_intern("-@"); + + /* + * Represents a location in source code or library + * address which calls a memory allocation. It is + * updated automatically as allocations are made, so + * there is no need to reload or reread it from Mwrap#[]. + * This class is only available since mwrap 2.0.0+. + */ + cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject); + rb_undef_alloc_func(cSrcLoc); + rb_define_singleton_method(mod, "dump", mwrap_dump, -1); + rb_define_singleton_method(mod, "reset", reset_m, 0); + rb_define_singleton_method(mod, "clear", reset_m, 0); + rb_define_singleton_method(mod, "each", mwrap_each, -1); + rb_define_singleton_method(mod, "[]", mwrap_aref, 1); + rb_define_singleton_method(mod, "quiet", mwrap_quiet, 0); + rb_define_singleton_method(mod, "total_bytes_allocated", total_inc, 0); + rb_define_singleton_method(mod, "total_bytes_freed", total_dec, 0); + + + rb_define_method(cSrcLoc, "each", src_loc_each, 0); + rb_define_method(cSrcLoc, "frees", src_loc_frees, 0); + rb_define_method(cSrcLoc, "allocations", src_loc_allocations, 0); + rb_define_method(cSrcLoc, "total", src_loc_total, 0); + rb_define_method(cSrcLoc, "mean_lifespan", src_loc_mean_lifespan, 0); + rb_define_method(cSrcLoc, "max_lifespan", src_loc_max_lifespan, 0); + rb_define_method(cSrcLoc, "name", src_loc_name, 0); + + --locating; +} |