mwrap.git  about / heads / tags
LD_PRELOAD malloc wrapper + line stats for Ruby
blob 826ca929027c3f8b9ea59a6868700c877de23a2b 9996 bytes (raw)
$ git show HEAD:ext/mwrap/mwrap.c	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
 
/*
 * Copyright (C) mwrap hackers <mwrap-public@80x24.org>
 * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
 */
#define MWRAP_RUBY 1
#include "mwrap_core.h"

static ID id_uminus;
extern VALUE __attribute__((weak)) rb_stderr;
extern VALUE __attribute__((weak)) rb_cObject;
extern VALUE __attribute__((weak)) rb_eTypeError;
extern VALUE __attribute__((weak)) rb_yield(VALUE);

/*
 * call-seq:
 *
 *	Mwrap.dump([[io] [, min]] -> nil
 *
 * Dumps the current totals to +io+ which must be an IO object
 * (StringIO and similar are not supported).  Total sizes smaller
 * than or equal to +min+ are skipped.
 *
 * The output is space-delimited by 3 columns:
 *
 * total_size      call_count      location
 */
static VALUE mwrap_dump(int argc, VALUE *argv, VALUE mod)
{
	VALUE io, min;
	struct dump_arg a = { .dump_csv = false };
	rb_io_t *fptr;

	rb_scan_args(argc, argv, "02", &io, &min);

	if (NIL_P(io))
		/* library may be linked w/o Ruby */
		io = rb_stderr;

	a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
	io = rb_io_get_io(io);
	io = rb_io_get_write_io(io);
	GetOpenFile(io, fptr);
	a.fp = rb_io_stdio_file(fptr);

	rb_thread_call_without_gvl((void *(*)(void *))dump_to_file, &a, 0, 0);
	RB_GC_GUARD(io);
	return Qnil;
}

/* The whole operation is not remotely atomic... */
static void *totals_reset(void *ign)
{
	mwrap_reset();
	return NULL;
}

/*
 * call-seq:
 *
 *	Mwrap.reset -> nil
 *
 * Resets the the total tables by zero-ing all counters.
 * This resets all statistics.  This is not an atomic operation
 * as other threads (outside of GVL) may increment counters.
 */
static VALUE reset_m(VALUE mod)
{
	rb_thread_call_without_gvl(totals_reset, 0, 0, 0);
	return Qnil;
}

static VALUE rcu_unlock_ensure(VALUE ignored)
{
	rcu_read_unlock();
	--locating;
	return Qfalse;
}

static VALUE location_string(const struct src_loc *l)
{
	VALUE tmp = rb_str_new(NULL, 0);

	if (l->f) {
		rb_str_cat(tmp, l->f->fn, l->f->fn_len);
		if (l->lineno == U24_MAX)
			rb_str_cat_cstr(tmp, ":-");
		else
			rb_str_catf(tmp, ":%u", l->lineno);
	}
	if (l->bt_len) {
		AUTO_FREE char **s = bt_syms(l->bt, l->bt_len);

		if (s) {
			if (l->f)
				rb_str_cat_cstr(tmp, "\n");
			rb_str_cat_cstr(tmp, s[0]);
			for (uint32_t i = 1; i < l->bt_len; ++i)
				rb_str_catf(tmp, "\n%s", s[i]);
		}
	}

	/* deduplicate and try to free up some memory */
	VALUE ret = rb_funcall(tmp, id_uminus, 0);
	if (!OBJ_FROZEN_RAW(tmp))
		rb_str_resize(tmp, 0);

	return ret;
}

static VALUE dump_each_rcu(VALUE x)
{
	struct dump_arg *a = (struct dump_arg *)x;
	struct cds_lfht *t;
	struct cds_lfht_iter iter;
	struct src_loc *l;

	t = CMM_LOAD_SHARED(totals);
	cds_lfht_for_each_entry(t, &iter, l, hnode) {
		VALUE v[6];
		if (l->total <= a->min) continue;

		v[0] = location_string(l);
		v[1] = SIZET2NUM(l->total);
		v[2] = SIZET2NUM(l->allocations);
		v[3] = SIZET2NUM(l->frees);
		v[4] = SIZET2NUM(l->age_total);
		v[5] = SIZET2NUM(l->max_lifespan);

		rb_yield_values2(6, v);
		assert(rcu_read_ongoing());
	}
	return Qnil;
}

/*
 * call-seq:
 *
 *	Mwrap.each([min]) do |location,total,allocations,frees,age_total,max_lifespan|
 *	  ...
 *	end
 *
 * Yields each entry of the of the table to a caller-supplied block.
 * +min+ may be specified to filter out lines with +total+ bytes
 * equal-to-or-smaller-than the supplied minimum.
 */
static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
{
	VALUE min;
	struct dump_arg a;

	rb_scan_args(argc, argv, "01", &min);
	a.min = NIL_P(min) ? 0 : NUM2SIZET(min);

	++locating;
	rcu_read_lock();

	return rb_ensure(dump_each_rcu, (VALUE)&a, rcu_unlock_ensure, 0);
}

static size_t
src_loc_memsize(const void *p)
{
	return sizeof(struct src_loc);
}

static const rb_data_type_t src_loc_type = {
	"source_location",
	/* no marking, no freeing */
	{ 0, 0, src_loc_memsize, /* reserved */ },
	/* parent, data, [ flags ] */
};

static VALUE cSrcLoc;

/*
 * call-seq:
 *	Mwrap[location] -> Mwrap::SourceLocation
 *
 * Returns the associated Mwrap::SourceLocation given the +location+
 * String.  +location+ is either a Ruby source location path:line
 * (e.g. "/path/to/foo.rb:5") or a hexadecimal memory address with
 * square-braces part yielded by Mwrap.dump (e.g. "[0xdeadbeef]")
 */
static VALUE mwrap_aref(VALUE mod, VALUE loc)
{
	const char *str = StringValueCStr(loc);
	long len = RSTRING_LEN(loc);
	assert(len >= 0);
	struct src_loc *l = mwrap_get(str, (size_t)len);

	return l ? TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l) : Qnil;
}

static VALUE src_loc_each_i(VALUE p)
{
	struct alloc_hdr *h;
	struct src_loc *l = (struct src_loc *)p;

	cds_list_for_each_entry_rcu(h, &l->allocs, anode) {
		size_t gen = uatomic_read(&h->as.live.gen);
		size_t size = uatomic_read(&h->size);

		if (size) {
			VALUE v[2];
			v[0] = SIZET2NUM(size);
			v[1] = SIZET2NUM(gen);

			rb_yield_values2(2, v);
		}
	}

	return Qfalse;
}

static struct src_loc *src_loc_of(VALUE self)
{
	struct src_loc *l;
	TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l);
	assert(l);
	return l;
}

/*
 * call-seq:
 *	loc = Mwrap[location]
 *	loc.each { |size,generation| ... }
 *
 * Iterates through live allocations for a given Mwrap::SourceLocation,
 * yielding the +size+ (in bytes) and +generation+ of each allocation.
 * The +generation+ is the value of the GC.count method at the time
 * the allocation was made.
 *
 * This functionality is only available in mwrap 2.0.0+
 */
static VALUE src_loc_each(VALUE self)
{
	struct src_loc *l = src_loc_of(self);

	assert(locating == 0 && "forgot to clear locating");
	++locating;
	rcu_read_lock();
	rb_ensure(src_loc_each_i, (VALUE)l, rcu_unlock_ensure, 0);
	return self;
}

/*
 * The the mean lifespan (in GC generations) of allocations made from this
 * location.  This does not account for live allocations.
 */
static VALUE src_loc_mean_lifespan(VALUE self)
{
	struct src_loc *l = src_loc_of(self);
	size_t tot, frees;

	frees = uatomic_read(&l->frees);
	tot = uatomic_read(&l->age_total);
	return DBL2NUM(frees ? ((double)tot/(double)frees) : HUGE_VAL);
}

/* The number of frees made from this location */
static VALUE src_loc_frees(VALUE self)
{
	return SIZET2NUM(uatomic_read(&src_loc_of(self)->frees));
}

/* The number of allocations made from this location */
static VALUE src_loc_allocations(VALUE self)
{
	return SIZET2NUM(uatomic_read(&src_loc_of(self)->allocations));
}

/* The total number of bytes allocated from this location */
static VALUE src_loc_total(VALUE self)
{
	return SIZET2NUM(uatomic_read(&src_loc_of(self)->total));
}

/*
 * The maximum age (in GC generations) of an allocation before it was freed.
 * This does not account for live allocations.
 */
static VALUE src_loc_max_lifespan(VALUE self)
{
	return SIZET2NUM(uatomic_read(&src_loc_of(self)->max_lifespan));
}

/*
 * Returns a frozen String location of the given SourceLocation object.
 */
static VALUE src_loc_name(VALUE self)
{
	struct src_loc *l = src_loc_of(self);
	VALUE ret;

	++locating;
	ret = location_string(l);
	--locating;
	return ret;
}

static VALUE reset_locating(VALUE ign) { --locating; return Qfalse; }

/*
 * call-seq:
 *
 *	Mwrap.quiet do |depth|
 *	  # expensive sort/calculate/emitting results of Mwrap.each
 *	  # affecting statistics of the rest of the app
 *	end
 *
 * Stops allocation tracking inside the block.  This is useful for
 * monitoring code which calls other Mwrap (or ObjectSpace/GC)
 * functions which unavoidably allocate memory.
 *
 * This feature was added in mwrap 2.0.0+
 */
static VALUE mwrap_quiet(VALUE mod)
{
	size_t cur = ++locating;
	return rb_ensure(rb_yield, SIZET2NUM(cur), reset_locating, 0);
}

/*
 * total bytes allocated as tracked by mwrap
 */
static VALUE total_inc(VALUE mod)
{
	return SIZET2NUM(total_bytes_inc);
}

/*
 * total bytes freed as tracked by mwrap
 */
static VALUE total_dec(VALUE mod)
{
	return SIZET2NUM(total_bytes_dec);
}

/*
 * Document-module: Mwrap
 *
 *   require 'mwrap'
 *
 * Mwrap has a dual function as both a Ruby C extension and LD_PRELOAD
 * wrapper.  As a Ruby C extension, it exposes a limited Ruby API.
 * To be effective at gathering status, mwrap must be loaded as a
 * LD_PRELOAD (using the mwrap(1) executable makes it easy)
 *
 * ENVIRONMENT
 *
 * The "MWRAP" environment variable contains a comma-delimited list
 * of key:value options for automatically dumping at program exit.
 *
 * * dump_fd: a writable FD to dump to
 * * dump_path: a path to dump to, the file is opened in O_APPEND mode
 * * dump_min: the minimum allocation size (total) to dump
 *
 * If both `dump_fd' and `dump_path' are specified, dump_path takes
 * precedence.
 */
void Init_mwrap(void)
{
	VALUE mod;

	++locating;
	mod = rb_define_module("Mwrap");
	id_uminus = rb_intern("-@");

	/*
	 * Represents a location in source code or library
	 * address which calls a memory allocation.  It is
	 * updated automatically as allocations are made, so
	 * there is no need to reload or reread it from Mwrap#[].
	 * This class is only available since mwrap 2.0.0+.
	 */
	cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject);
	rb_undef_alloc_func(cSrcLoc);
	rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
	rb_define_singleton_method(mod, "reset", reset_m, 0);
	rb_define_singleton_method(mod, "clear", reset_m, 0);
	rb_define_singleton_method(mod, "each", mwrap_each, -1);
	rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
	rb_define_singleton_method(mod, "quiet", mwrap_quiet, 0);
	rb_define_singleton_method(mod, "total_bytes_allocated", total_inc, 0);
	rb_define_singleton_method(mod, "total_bytes_freed", total_dec, 0);


	rb_define_method(cSrcLoc, "each", src_loc_each, 0);
	rb_define_method(cSrcLoc, "frees", src_loc_frees, 0);
	rb_define_method(cSrcLoc, "allocations", src_loc_allocations, 0);
	rb_define_method(cSrcLoc, "total", src_loc_total, 0);
	rb_define_method(cSrcLoc, "mean_lifespan", src_loc_mean_lifespan, 0);
	rb_define_method(cSrcLoc, "max_lifespan", src_loc_max_lifespan, 0);
	rb_define_method(cSrcLoc, "name", src_loc_name, 0);

	--locating;
}

git clone https://80x24.org/mwrap.git