mwrap user+dev discussion/patches/pulls/bugs/help
 help / color / Atom feed
* [PATCH 0/19] the heavy version of mwrap
@ 2018-07-16 21:19 Eric Wong
  2018-07-16 21:19 ` [PATCH 01/19] support per-allocation headers for per-alloc tracking Eric Wong
                   ` (18 more replies)
  0 siblings, 19 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

TL;DR: live demo of the new features running inside a Rack app:

  https://80x24.org/MWRAP/each/2000

The following changes since commit 834de3bc0da4af53535d5c9d4975e546df9fb186:

  bin/mwrap: support LISTEN_FDS env from systemd (2018-07-16 19:33:12 +0000)

are available in the Git repository at:

  https://80x24.org/mwrap.git heavy

for you to fetch changes up to c432e3ad30aa247dbac8575af87b0c594365d3fd:

  mwrap_rack: Rack app to track live allocations (2018-07-16 21:14:13 +0000)

----------------------------------------------------------------
Eric Wong (19):
      support per-allocation headers for per-alloc tracking
      mwrap: use malloc to do our own memalign
      hold RCU read lock to insert each allocation
      realloc: do not copy if allocation failed
      internal_memalign: do not assume real_malloc succeeds
      ensure ENOMEM is preserved in errno when appropriate
      memalign: check alignment on all public functions
      reduce stack usage from file names
      resolve real_malloc earlier for C++ programs
      allow analyzing live allocations via Mwrap[location]
      alias Mwrap.clear to Mwrap.reset
      implement accessors for SourceLocation
      mwrap_aref: quiet -Wshorten-64-to-32 warning
      fixes for FreeBSD 11.1...
      use memrchr to extract address under glibc
      do not track allocations for constructor and Init_
      disable memalign tracking by default
      support Mwrap.quiet to temporarily disable allocation tracking
      mwrap_rack: Rack app to track live allocations

 ext/mwrap/extconf.rb |  15 +
 ext/mwrap/mwrap.c    | 792 +++++++++++++++++++++++++++++++++++++++++++--------
 lib/mwrap_rack.rb    | 105 +++++++
 test/test_mwrap.rb   | 113 ++++++++
 4 files changed, 901 insertions(+), 124 deletions(-)
 create mode 100644 lib/mwrap_rack.rb



^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 01/19] support per-allocation headers for per-alloc tracking
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 02/19] mwrap: use malloc to do our own memalign Eric Wong
                   ` (17 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

This increases costs even more, but will allow leak finding.
It will be made optional in the future.
---
 ext/mwrap/extconf.rb |  15 +++
 ext/mwrap/mwrap.c    | 312 ++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 292 insertions(+), 35 deletions(-)

diff --git a/ext/mwrap/extconf.rb b/ext/mwrap/extconf.rb
index 4ac8881..e9dbb1e 100644
--- a/ext/mwrap/extconf.rb
+++ b/ext/mwrap/extconf.rb
@@ -10,4 +10,19 @@ have_library 'urcu-bp' or abort 'liburcu-bp not found'
 have_library 'dl'
 have_library 'c'
 have_library 'execinfo' # FreeBSD
+
+if try_link(<<'')
+int main(void) { return __builtin_add_overflow_p(0,0,(int)1); }
+
+  $defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
+end
+
+if try_link(<<'')
+int main(int a) { return __builtin_add_overflow(0,0,&a); }
+
+  $defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
+else
+  abort 'missing __builtin_add_overflow'
+end
+
 create_makefile 'mwrap'
diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index c160e33..2e75d8f 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -16,14 +16,21 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <pthread.h>
 #include <urcu-bp.h>
 #include <urcu/rculfhash.h>
+#include <urcu/rculist.h>
 #include "jhash.h"
 
 static ID id_uminus;
 const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */
 extern int __attribute__((weak)) ruby_thread_has_gvl_p(void);
 extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
+extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */
+extern size_t __attribute__((weak)) rb_gc_count(void);
+
+/* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */
+#define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2)
 
 int __attribute__((weak)) ruby_thread_has_gvl_p(void)
 {
@@ -32,17 +39,17 @@ int __attribute__((weak)) ruby_thread_has_gvl_p(void)
 
 #ifdef __FreeBSD__
 void *__malloc(size_t);
-void *__calloc(size_t, size_t);
-void *__realloc(void *, size_t);
+void *__memalign(size_t, size_t);
+void __free(void *);
 static void *(*real_malloc)(size_t) = __malloc;
-static void *(*real_calloc)(size_t, size_t) = __calloc;
-static void *(*real_realloc)(void *, size_t) = __realloc;
+static void *(*real_memalign)(size_t, size_t) = __aligned_alloc;
+static void (*real_free)(void *) = __free;
 #  define RETURN_IF_NOT_READY() do {} while (0) /* nothing */
 #else
 static int ready;
 static void *(*real_malloc)(size_t);
-static void *(*real_calloc)(size_t, size_t);
-static void *(*real_realloc)(void *, size_t);
+static void *(*real_memalign)(size_t, size_t);
+static void (*real_free)(void *);
 
 /*
  * we need to fake an OOM condition while dlsym is running,
@@ -58,7 +65,26 @@ static void *(*real_realloc)(void *, size_t);
 
 #endif /* !FreeBSD */
 
+static size_t generation;
+static size_t page_size;
 static struct cds_lfht *totals;
+union padded_mutex {
+	pthread_mutex_t mtx;
+	char pad[64];
+};
+
+/* a round-robin pool of mutexes */
+#define MUTEX_NR   (1 << 6)
+#define MUTEX_MASK (MUTEX_NR - 1)
+static size_t mutex_i;
+static union padded_mutex mutexes[MUTEX_NR] = {
+	[0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
+};
+
+static pthread_mutex_t *mutex_assign(void)
+{
+	return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
+}
 
 static struct cds_lfht *
 lfht_new(void)
@@ -72,16 +98,16 @@ __attribute__((constructor)) static void resolve_malloc(void)
 
 #ifndef __FreeBSD__
 	real_malloc = dlsym(RTLD_NEXT, "malloc");
-	real_calloc = dlsym(RTLD_NEXT, "calloc");
-	real_realloc = dlsym(RTLD_NEXT, "realloc");
-	if (!real_calloc || !real_malloc || !real_realloc) {
-		fprintf(stderr, "missing calloc/malloc/realloc %p %p %p\n",
-			real_calloc, real_malloc, real_realloc);
+	real_memalign = dlsym(RTLD_NEXT, "aligned_alloc");
+	real_free = dlsym(RTLD_NEXT, "free");
+	if (!real_malloc || !real_memalign || !real_free) {
+		fprintf(stderr, "missing malloc/aligned_alloc/free\n"
+			"\t%p %p %p\n",
+			real_malloc, real_memalign, real_free);
 		_exit(1);
 	}
 	ready = 1;
 #endif
-
 	totals = lfht_new();
 	if (!totals)
 		fprintf(stderr, "failed to allocate totals table\n");
@@ -91,6 +117,21 @@ __attribute__((constructor)) static void resolve_malloc(void)
 				call_rcu_after_fork_child);
 	if (err)
 		fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
+	page_size = sysconf(_SC_PAGESIZE);
+}
+
+static void
+mutex_lock(pthread_mutex_t *m)
+{
+	int err = pthread_mutex_lock(m);
+	assert(err == 0);
+}
+
+static void
+mutex_unlock(pthread_mutex_t *m)
+{
+	int err = pthread_mutex_unlock(m);
+	assert(err == 0);
 }
 
 #ifndef HAVE_MEMPCPY
@@ -142,19 +183,47 @@ static char *int2str(int num, char *dst, size_t * size)
  */
 static int has_ec_p(void)
 {
-	return (ruby_thread_has_gvl_p() && ruby_current_execution_context_ptr);
+	return (ruby_thread_has_gvl_p() && ruby_current_vm_ptr &&
+		ruby_current_execution_context_ptr);
 }
 
+/* allocated via real_malloc/real_free */
 struct src_loc {
 	struct rcu_head rcu_head;
+	pthread_mutex_t *mtx;
 	size_t calls;
 	size_t total;
 	struct cds_lfht_node hnode;
+	struct cds_list_head allocs; /* <=> alloc_hdr.node */
 	uint32_t hval;
 	uint32_t capa;
 	char k[];
 };
 
+/* every allocation has this in the header, maintain alignment with malloc  */
+struct alloc_hdr {
+	struct cds_list_head anode; /* <=> src_loc.allocs */
+	union {
+		struct {
+			size_t gen; /* rb_gc_count() */
+			struct src_loc *loc;
+		} live;
+		struct rcu_head dead;
+	} as;
+	void *real; /* what to call real_free on */
+	size_t size;
+};
+
+static struct alloc_hdr *ptr2hdr(void *p)
+{
+	return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
+}
+
+static void *hdr2ptr(struct alloc_hdr *h)
+{
+	return (void *)((uintptr_t)h + sizeof(struct alloc_hdr));
+}
+
 static int loc_is_addr(const struct src_loc *l)
 {
 	return l->capa == 0;
@@ -177,14 +246,13 @@ static int loc_eq(struct cds_lfht_node *node, const void *key)
 		memcmp(k->k, existing->k, loc_size(k)) == 0);
 }
 
-static void totals_add(struct src_loc *k)
+static struct src_loc *totals_add(struct src_loc *k)
 {
 	struct cds_lfht_iter iter;
 	struct cds_lfht_node *cur;
-	struct src_loc *l;
+	struct src_loc *l = 0;
 	struct cds_lfht *t;
 
-
 again:
 	rcu_read_lock();
 	t = rcu_dereference(totals);
@@ -197,25 +265,27 @@ again:
 		uatomic_add(&l->calls, 1);
 	} else {
 		size_t n = loc_size(k);
-		l = malloc(sizeof(*l) + n);
+		l = real_malloc(sizeof(*l) + n);
 		if (!l) goto out_unlock;
-
 		memcpy(l, k, sizeof(*l) + n);
+		l->mtx = mutex_assign();
 		l->calls = 1;
+		CDS_INIT_LIST_HEAD(&l->allocs);
 		cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
 		if (cur != &l->hnode) { /* lost race */
 			rcu_read_unlock();
-			free(l);
+			real_free(l);
 			goto again;
 		}
 	}
 out_unlock:
 	rcu_read_unlock();
+	return l;
 }
 
-static void update_stats(size_t size, uintptr_t caller)
+static struct src_loc *update_stats(size_t size, uintptr_t caller)
 {
-	struct src_loc *k;
+	struct src_loc *k, *ret = 0;
 	static const size_t xlen = sizeof(caller);
 	char *dst;
 
@@ -227,6 +297,8 @@ static void update_stats(size_t size, uintptr_t caller)
 		size_t len;
 		size_t int_size = INT2STR_MAX;
 
+		generation = rb_gc_count();
+
 		if (!ptr) goto unknown;
 
 		/* avoid vsnprintf or anything which could call malloc here: */
@@ -240,7 +312,7 @@ static void update_stats(size_t size, uintptr_t caller)
 			*dst = 0;	/* terminate string */
 			k->capa = (uint32_t)(dst - k->k + 1);
 			k->hval = jhash(k->k, k->capa, 0xdeadbeef);
-			totals_add(k);
+			ret = totals_add(k);
 		} else {
 			rb_bug("bad math making key from location %s:%d\n",
 				ptr, line);
@@ -252,36 +324,206 @@ unknown:
 		memcpy(k->k, &caller, xlen);
 		k->capa = 0;
 		k->hval = jhash(k->k, xlen, 0xdeadbeef);
-		totals_add(k);
+		ret = totals_add(k);
 	}
 out:
 	--locating;
+	return ret;
+}
+
+size_t malloc_usable_size(void *p)
+{
+	return ptr2hdr(p)->size;
+}
+
+static void
+free_hdr_rcu(struct rcu_head *dead)
+{
+	struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead);
+	real_free(h->real);
+}
+
+void free(void *p)
+{
+	if (p) {
+		struct alloc_hdr *h = ptr2hdr(p);
+		if (h->as.live.loc) {
+			h->size = 0;
+			mutex_lock(h->as.live.loc->mtx);
+			cds_list_del_rcu(&h->anode);
+			mutex_unlock(h->as.live.loc->mtx);
+			call_rcu(&h->as.dead, free_hdr_rcu);
+		}
+		else {
+			real_free(h->real);
+		}
+	}
+}
+
+static void
+alloc_insert(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
+{
+	if (!h) return;
+	h->size = size;
+	h->real = real;
+	h->as.live.loc = l;
+	h->as.live.gen = generation;
+	if (l) {
+		mutex_lock(l->mtx);
+		cds_list_add_rcu(&h->anode, &l->allocs);
+		mutex_unlock(l->mtx);
+	}
+}
+
+static size_t size_align(size_t size, size_t alignment)
+{
+	return ((size + (alignment - 1)) & ~(alignment - 1));
+}
+
+static void *internal_memalign(size_t alignment, size_t size, uintptr_t caller)
+{
+	struct src_loc *l;
+	struct alloc_hdr *h;
+	void *p, *real;
+	size_t asize;
+
+	RETURN_IF_NOT_READY();
+	if (alignment <= ASSUMED_MALLOC_ALIGNMENT)
+		return malloc(size);
+	for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
+		; /* double alignment until >= sizeof(struct alloc_hdr) */
+	if (__builtin_add_overflow(size, alignment, &asize)) {
+		errno = ENOMEM;
+		return 0;
+	}
+	l = update_stats(size, caller);
+	real = real_memalign(alignment, asize);
+	p = (void *)((uintptr_t)real + alignment);
+	h = (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
+	alloc_insert(l, h, size, real);
+
+	return p;
+}
+
+void *memalign(size_t alignment, size_t size)
+{
+	return internal_memalign(alignment, size, RETURN_ADDRESS(0));
+}
+
+static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; }
+
+int posix_memalign(void **p, size_t alignment, size_t size)
+{
+	size_t d = alignment / sizeof(void*);
+	size_t r = alignment % sizeof(void*);
+
+	if (r != 0 || d == 0 || !is_power_of_two(d))
+		return EINVAL;
+
+	*p = internal_memalign(alignment, size, RETURN_ADDRESS(0));
+	return *p ? 0 : ENOMEM;
+}
+
+void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
+void cfree(void *) __attribute__((alias("free")));
+
+void *valloc(size_t size)
+{
+	return internal_memalign(page_size, size, RETURN_ADDRESS(0));
+}
+
+#if __GNUC__ < 7
+#  define add_overflow_p(a,b) __extension__({ \
+		__typeof__(a) _c; \
+		__builtin_add_overflow(a,b,&_c); \
+	})
+#else
+#  define add_overflow_p(a,b) \
+		__builtin_add_overflow_p((a),(b),(__typeof__(a+b))0)
+#endif
+
+void *pvalloc(size_t size)
+{
+	size_t alignment = page_size;
+
+	if (add_overflow_p(size, alignment)) {
+		errno = ENOMEM;
+		return 0;
+	}
+	size = size_align(size, alignment);
+	return internal_memalign(alignment, size, RETURN_ADDRESS(0));
 }
 
-/*
- * Do we care for *memalign? ruby/gc.c uses it in ways this lib
- * doesn't care about, but maybe some gems use it, too.
- */
 void *malloc(size_t size)
 {
+	struct src_loc *l;
+	struct alloc_hdr *h;
+	size_t asize;
+
+	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
+		errno = ENOMEM;
+		return 0;
+	}
 	RETURN_IF_NOT_READY();
-	update_stats(size, RETURN_ADDRESS(0));
-	return real_malloc(size);
+	l = update_stats(size, RETURN_ADDRESS(0));
+	h = real_malloc(asize);
+	if (!h) return 0;
+	alloc_insert(l, h, size, h);
+	return hdr2ptr(h);
 }
 
 void *calloc(size_t nmemb, size_t size)
 {
+	void *p;
+	struct src_loc *l;
+	struct alloc_hdr *h;
+	size_t asize;
+
+	if (__builtin_mul_overflow(size, nmemb, &size)) {
+		errno = ENOMEM;
+		return 0;
+	}
+	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
+		errno = ENOMEM;
+		return 0;
+	}
 	RETURN_IF_NOT_READY();
-	/* ruby_xcalloc already does overflow checking */
-	update_stats(nmemb * size, RETURN_ADDRESS(0));
-	return real_calloc(nmemb, size);
+	l = update_stats(size, RETURN_ADDRESS(0));
+	h = real_malloc(asize);
+	if (!h) return 0;
+	alloc_insert(l, h, size, h);
+	p = hdr2ptr(h);
+	memset(p, 0, size);
+	return p;
 }
 
 void *realloc(void *ptr, size_t size)
 {
+	void *p;
+	struct src_loc *l;
+	struct alloc_hdr *h;
+	size_t asize;
+
+	if (!size) {
+		free(ptr);
+		return 0;
+	}
+	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
+		errno = ENOMEM;
+		return 0;
+	}
 	RETURN_IF_NOT_READY();
-	update_stats(size, RETURN_ADDRESS(0));
-	return real_realloc(ptr, size);
+	l = update_stats(size, RETURN_ADDRESS(0));
+	h = real_malloc(asize);
+	if (!h) return 0;
+	alloc_insert(l, h, size, h);
+	p = hdr2ptr(h);
+	if (ptr) {
+		struct alloc_hdr *old = ptr2hdr(ptr);
+		memcpy(p, ptr, old->size < size ? old->size : size);
+		free(ptr);
+	}
+	return p;
 }
 
 struct dump_arg {
@@ -360,7 +602,7 @@ static void
 free_src_loc(struct rcu_head *head)
 {
 	struct src_loc *l = caa_container_of(head, struct src_loc, rcu_head);
-	free(l);
+	real_free(l);
 }
 
 static void *totals_clear(void *ign)
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 02/19] mwrap: use malloc to do our own memalign
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
  2018-07-16 21:19 ` [PATCH 01/19] support per-allocation headers for per-alloc tracking Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 03/19] hold RCU read lock to insert each allocation Eric Wong
                   ` (16 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

This reduces portability concerns about the various names of
memalign-related functions and should let us save some memory by
not requiring the malloc implementation to align-away the data.
---
 ext/mwrap/mwrap.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 2e75d8f..0da7a68 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -39,16 +39,13 @@ int __attribute__((weak)) ruby_thread_has_gvl_p(void)
 
 #ifdef __FreeBSD__
 void *__malloc(size_t);
-void *__memalign(size_t, size_t);
 void __free(void *);
 static void *(*real_malloc)(size_t) = __malloc;
-static void *(*real_memalign)(size_t, size_t) = __aligned_alloc;
 static void (*real_free)(void *) = __free;
 #  define RETURN_IF_NOT_READY() do {} while (0) /* nothing */
 #else
 static int ready;
 static void *(*real_malloc)(size_t);
-static void *(*real_memalign)(size_t, size_t);
 static void (*real_free)(void *);
 
 /*
@@ -98,12 +95,10 @@ __attribute__((constructor)) static void resolve_malloc(void)
 
 #ifndef __FreeBSD__
 	real_malloc = dlsym(RTLD_NEXT, "malloc");
-	real_memalign = dlsym(RTLD_NEXT, "aligned_alloc");
 	real_free = dlsym(RTLD_NEXT, "free");
-	if (!real_malloc || !real_memalign || !real_free) {
+	if (!real_malloc || !real_free) {
 		fprintf(stderr, "missing malloc/aligned_alloc/free\n"
-			"\t%p %p %p\n",
-			real_malloc, real_memalign, real_free);
+			"\t%p %p\n", real_malloc, real_free);
 		_exit(1);
 	}
 	ready = 1;
@@ -380,6 +375,16 @@ static size_t size_align(size_t size, size_t alignment)
 	return ((size + (alignment - 1)) & ~(alignment - 1));
 }
 
+static bool ptr_is_aligned(void *ptr, size_t alignment)
+{
+	return ((uintptr_t)ptr & (alignment - 1)) == 0;
+}
+
+static void *ptr_align(void *ptr, size_t alignment)
+{
+	return (void *)(((uintptr_t)ptr + (alignment - 1)) & ~(alignment - 1));
+}
+
 static void *internal_memalign(size_t alignment, size_t size, uintptr_t caller)
 {
 	struct src_loc *l;
@@ -392,14 +397,18 @@ static void *internal_memalign(size_t alignment, size_t size, uintptr_t caller)
 		return malloc(size);
 	for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
 		; /* double alignment until >= sizeof(struct alloc_hdr) */
-	if (__builtin_add_overflow(size, alignment, &asize)) {
+	if (__builtin_add_overflow(size, alignment, &asize) ||
+	    __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize)) {
 		errno = ENOMEM;
 		return 0;
 	}
+	/* assert(asize == (alignment + size + sizeof(struct alloc_hdr))); */
 	l = update_stats(size, caller);
-	real = real_memalign(alignment, asize);
-	p = (void *)((uintptr_t)real + alignment);
-	h = (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
+	real = real_malloc(asize);
+	p = hdr2ptr(real);
+	if (!ptr_is_aligned(p, alignment))
+		p = ptr_align(p, alignment);
+	h = ptr2hdr(p);
 	alloc_insert(l, h, size, real);
 
 	return p;
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 03/19] hold RCU read lock to insert each allocation
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
  2018-07-16 21:19 ` [PATCH 01/19] support per-allocation headers for per-alloc tracking Eric Wong
  2018-07-16 21:19 ` [PATCH 02/19] mwrap: use malloc to do our own memalign Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 04/19] realloc: do not copy if allocation failed Eric Wong
                   ` (15 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

We need to hold the RCU read lock to ensure the liveness
of the associated src_loc struct we'll be inserting into.
---
 ext/mwrap/mwrap.c | 68 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 25 deletions(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 0da7a68..8d8b19f 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -241,7 +241,7 @@ static int loc_eq(struct cds_lfht_node *node, const void *key)
 		memcmp(k->k, existing->k, loc_size(k)) == 0);
 }
 
-static struct src_loc *totals_add(struct src_loc *k)
+static struct src_loc *totals_add_rcu(struct src_loc *k)
 {
 	struct cds_lfht_iter iter;
 	struct cds_lfht_node *cur;
@@ -249,7 +249,6 @@ static struct src_loc *totals_add(struct src_loc *k)
 	struct cds_lfht *t;
 
 again:
-	rcu_read_lock();
 	t = rcu_dereference(totals);
 	if (!t) goto out_unlock;
 	cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
@@ -270,20 +269,22 @@ again:
 		if (cur != &l->hnode) { /* lost race */
 			rcu_read_unlock();
 			real_free(l);
+			rcu_read_lock();
 			goto again;
 		}
 	}
 out_unlock:
-	rcu_read_unlock();
 	return l;
 }
 
-static struct src_loc *update_stats(size_t size, uintptr_t caller)
+static struct src_loc *update_stats_rcu(size_t size, uintptr_t caller)
 {
 	struct src_loc *k, *ret = 0;
 	static const size_t xlen = sizeof(caller);
 	char *dst;
 
+	assert(rcu_read_ongoing());
+
 	if (locating++) goto out; /* do not recurse into another *alloc */
 
 	if (has_ec_p()) {
@@ -307,7 +308,7 @@ static struct src_loc *update_stats(size_t size, uintptr_t caller)
 			*dst = 0;	/* terminate string */
 			k->capa = (uint32_t)(dst - k->k + 1);
 			k->hval = jhash(k->k, k->capa, 0xdeadbeef);
-			ret = totals_add(k);
+			ret = totals_add_rcu(k);
 		} else {
 			rb_bug("bad math making key from location %s:%d\n",
 				ptr, line);
@@ -319,7 +320,7 @@ unknown:
 		memcpy(k->k, &caller, xlen);
 		k->capa = 0;
 		k->hval = jhash(k->k, xlen, 0xdeadbeef);
-		ret = totals_add(k);
+		ret = totals_add_rcu(k);
 	}
 out:
 	--locating;
@@ -356,8 +357,10 @@ void free(void *p)
 }
 
 static void
-alloc_insert(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
+alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
 {
+	/* we need src_loc to remain alive for the duration of this call */
+	assert(rcu_read_ongoing());
 	if (!h) return;
 	h->size = size;
 	h->real = real;
@@ -403,13 +406,15 @@ static void *internal_memalign(size_t alignment, size_t size, uintptr_t caller)
 		return 0;
 	}
 	/* assert(asize == (alignment + size + sizeof(struct alloc_hdr))); */
-	l = update_stats(size, caller);
+	rcu_read_lock();
+	l = update_stats_rcu(size, caller);
 	real = real_malloc(asize);
 	p = hdr2ptr(real);
 	if (!ptr_is_aligned(p, alignment))
 		p = ptr_align(p, alignment);
 	h = ptr2hdr(p);
-	alloc_insert(l, h, size, real);
+	alloc_insert_rcu(l, h, size, real);
+	rcu_read_unlock();
 
 	return p;
 }
@@ -468,17 +473,22 @@ void *malloc(size_t size)
 	struct src_loc *l;
 	struct alloc_hdr *h;
 	size_t asize;
+	void *p;
 
 	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
 		errno = ENOMEM;
 		return 0;
 	}
 	RETURN_IF_NOT_READY();
-	l = update_stats(size, RETURN_ADDRESS(0));
-	h = real_malloc(asize);
-	if (!h) return 0;
-	alloc_insert(l, h, size, h);
-	return hdr2ptr(h);
+	rcu_read_lock();
+	l = update_stats_rcu(size, RETURN_ADDRESS(0));
+	p = h = real_malloc(asize);
+	if (h) {
+		alloc_insert_rcu(l, h, size, h);
+		p = hdr2ptr(h);
+	}
+	rcu_read_unlock();
+	return p;
 }
 
 void *calloc(size_t nmemb, size_t size)
@@ -497,12 +507,15 @@ void *calloc(size_t nmemb, size_t size)
 		return 0;
 	}
 	RETURN_IF_NOT_READY();
-	l = update_stats(size, RETURN_ADDRESS(0));
-	h = real_malloc(asize);
-	if (!h) return 0;
-	alloc_insert(l, h, size, h);
-	p = hdr2ptr(h);
-	memset(p, 0, size);
+	rcu_read_lock();
+	l = update_stats_rcu(size, RETURN_ADDRESS(0));
+	p = h = real_malloc(asize);
+	if (p) {
+		alloc_insert_rcu(l, h, size, h);
+		p = hdr2ptr(h);
+		memset(p, 0, size);
+	}
+	rcu_read_unlock();
 	return p;
 }
 
@@ -522,11 +535,16 @@ void *realloc(void *ptr, size_t size)
 		return 0;
 	}
 	RETURN_IF_NOT_READY();
-	l = update_stats(size, RETURN_ADDRESS(0));
-	h = real_malloc(asize);
-	if (!h) return 0;
-	alloc_insert(l, h, size, h);
-	p = hdr2ptr(h);
+
+	rcu_read_lock();
+	l = update_stats_rcu(size, RETURN_ADDRESS(0));
+	p = h = real_malloc(asize);
+	if (p) {
+		alloc_insert_rcu(l, h, size, h);
+		p = hdr2ptr(h);
+	}
+	rcu_read_unlock();
+
 	if (ptr) {
 		struct alloc_hdr *old = ptr2hdr(ptr);
 		memcpy(p, ptr, old->size < size ? old->size : size);
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 04/19] realloc: do not copy if allocation failed
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (2 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 03/19] hold RCU read lock to insert each allocation Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 05/19] internal_memalign: do not assume real_malloc succeeds Eric Wong
                   ` (14 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

We shouldn't try to read NULL pointers :x
---
 ext/mwrap/mwrap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 8d8b19f..c0cf8ff 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -545,7 +545,7 @@ void *realloc(void *ptr, size_t size)
 	}
 	rcu_read_unlock();
 
-	if (ptr) {
+	if (ptr && p) {
 		struct alloc_hdr *old = ptr2hdr(ptr);
 		memcpy(p, ptr, old->size < size ? old->size : size);
 		free(ptr);
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 05/19] internal_memalign: do not assume real_malloc succeeds
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (3 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 04/19] realloc: do not copy if allocation failed Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 06/19] ensure ENOMEM is preserved in errno when appropriate Eric Wong
                   ` (13 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

Oops :x
---
 ext/mwrap/mwrap.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index c0cf8ff..fa52631 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -408,12 +408,14 @@ static void *internal_memalign(size_t alignment, size_t size, uintptr_t caller)
 	/* assert(asize == (alignment + size + sizeof(struct alloc_hdr))); */
 	rcu_read_lock();
 	l = update_stats_rcu(size, caller);
-	real = real_malloc(asize);
-	p = hdr2ptr(real);
-	if (!ptr_is_aligned(p, alignment))
-		p = ptr_align(p, alignment);
-	h = ptr2hdr(p);
-	alloc_insert_rcu(l, h, size, real);
+	p = real = real_malloc(asize);
+	if (real) {
+		p = hdr2ptr(real);
+		if (!ptr_is_aligned(p, alignment))
+			p = ptr_align(p, alignment);
+		h = ptr2hdr(p);
+		alloc_insert_rcu(l, h, size, real);
+	}
 	rcu_read_unlock();
 
 	return p;
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 06/19] ensure ENOMEM is preserved in errno when appropriate
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (4 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 05/19] internal_memalign: do not assume real_malloc succeeds Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 07/19] memalign: check alignment on all public functions Eric Wong
                   ` (12 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

rcu_read_unlock may clobber errno, so it's our responsibility
to preserve it.  Fortunately, ENOMEM is the only error which
real_malloc can fail with.

Also, do not set errno for posix_memalign.
---
 ext/mwrap/mwrap.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index fa52631..7ae892c 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -401,10 +401,9 @@ static void *internal_memalign(size_t alignment, size_t size, uintptr_t caller)
 	for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
 		; /* double alignment until >= sizeof(struct alloc_hdr) */
 	if (__builtin_add_overflow(size, alignment, &asize) ||
-	    __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize)) {
-		errno = ENOMEM;
+	    __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
 		return 0;
-	}
+
 	/* assert(asize == (alignment + size + sizeof(struct alloc_hdr))); */
 	rcu_read_lock();
 	l = update_stats_rcu(size, caller);
@@ -423,7 +422,9 @@ static void *internal_memalign(size_t alignment, size_t size, uintptr_t caller)
 
 void *memalign(size_t alignment, size_t size)
 {
-	return internal_memalign(alignment, size, RETURN_ADDRESS(0));
+	void *p = internal_memalign(alignment, size, RETURN_ADDRESS(0));
+	if (caa_unlikely(!p)) errno = ENOMEM;
+	return p;
 }
 
 static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; }
@@ -445,7 +446,9 @@ void cfree(void *) __attribute__((alias("free")));
 
 void *valloc(size_t size)
 {
-	return internal_memalign(page_size, size, RETURN_ADDRESS(0));
+	void *p = internal_memalign(page_size, size, RETURN_ADDRESS(0));
+	if (caa_unlikely(!p)) errno = ENOMEM;
+	return p;
 }
 
 #if __GNUC__ < 7
@@ -461,13 +464,16 @@ void *valloc(size_t size)
 void *pvalloc(size_t size)
 {
 	size_t alignment = page_size;
+	void *p;
 
 	if (add_overflow_p(size, alignment)) {
 		errno = ENOMEM;
 		return 0;
 	}
 	size = size_align(size, alignment);
-	return internal_memalign(alignment, size, RETURN_ADDRESS(0));
+	p = internal_memalign(alignment, size, RETURN_ADDRESS(0));
+	if (caa_unlikely(!p)) errno = ENOMEM;
+	return p;
 }
 
 void *malloc(size_t size)
@@ -490,6 +496,7 @@ void *malloc(size_t size)
 		p = hdr2ptr(h);
 	}
 	rcu_read_unlock();
+	if (caa_unlikely(!p)) errno = ENOMEM;
 	return p;
 }
 
@@ -518,6 +525,7 @@ void *calloc(size_t nmemb, size_t size)
 		memset(p, 0, size);
 	}
 	rcu_read_unlock();
+	if (caa_unlikely(!p)) errno = ENOMEM;
 	return p;
 }
 
@@ -552,6 +560,7 @@ void *realloc(void *ptr, size_t size)
 		memcpy(p, ptr, old->size < size ? old->size : size);
 		free(ptr);
 	}
+	if (caa_unlikely(!p)) errno = ENOMEM;
 	return p;
 }
 
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 07/19] memalign: check alignment on all public functions
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (5 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 06/19] ensure ENOMEM is preserved in errno when appropriate Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 08/19] reduce stack usage from file names Eric Wong
                   ` (11 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

And rework our internal_memalign interface to mimic the
posix_memalign API; since that's what Ruby favors and
avoids touching errno on errors.
---
 ext/mwrap/mwrap.c | 74 ++++++++++++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 30 deletions(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 7ae892c..6109070 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -42,11 +42,12 @@ void *__malloc(size_t);
 void __free(void *);
 static void *(*real_malloc)(size_t) = __malloc;
 static void (*real_free)(void *) = __free;
-#  define RETURN_IF_NOT_READY() do {} while (0) /* nothing */
+static const int ready = 1;
 #else
 static int ready;
 static void *(*real_malloc)(size_t);
 static void (*real_free)(void *);
+#endif /* !FreeBSD */
 
 /*
  * we need to fake an OOM condition while dlsym is running,
@@ -60,8 +61,6 @@ static void (*real_free)(void *);
 	} \
 } while (0)
 
-#endif /* !FreeBSD */
-
 static size_t generation;
 static size_t page_size;
 static struct cds_lfht *totals;
@@ -388,57 +387,72 @@ static void *ptr_align(void *ptr, size_t alignment)
 	return (void *)(((uintptr_t)ptr + (alignment - 1)) & ~(alignment - 1));
 }
 
-static void *internal_memalign(size_t alignment, size_t size, uintptr_t caller)
+static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; }
+
+static int
+internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
 {
 	struct src_loc *l;
 	struct alloc_hdr *h;
-	void *p, *real;
+	void *real;
 	size_t asize;
+	size_t d = alignment / sizeof(void*);
+	size_t r = alignment % sizeof(void*);
 
-	RETURN_IF_NOT_READY();
-	if (alignment <= ASSUMED_MALLOC_ALIGNMENT)
-		return malloc(size);
+	if (!ready) return ENOMEM;
+
+	if (r != 0 || d == 0 || !is_power_of_two(d))
+		return EINVAL;
+
+	if (alignment <= ASSUMED_MALLOC_ALIGNMENT) {
+		void *p = malloc(size);
+		if (!p) return ENOMEM;
+		*pp = p;
+		return 0;
+	}
 	for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
 		; /* double alignment until >= sizeof(struct alloc_hdr) */
 	if (__builtin_add_overflow(size, alignment, &asize) ||
 	    __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
-		return 0;
+		return ENOMEM;
 
 	/* assert(asize == (alignment + size + sizeof(struct alloc_hdr))); */
 	rcu_read_lock();
 	l = update_stats_rcu(size, caller);
-	p = real = real_malloc(asize);
+	real = real_malloc(asize);
 	if (real) {
-		p = hdr2ptr(real);
+		void *p = hdr2ptr(real);
 		if (!ptr_is_aligned(p, alignment))
 			p = ptr_align(p, alignment);
 		h = ptr2hdr(p);
 		alloc_insert_rcu(l, h, size, real);
+		*pp = p;
 	}
 	rcu_read_unlock();
 
-	return p;
+	return real ? 0 : ENOMEM;
 }
 
-void *memalign(size_t alignment, size_t size)
+static void *
+memalign_result(int err, void *p)
 {
-	void *p = internal_memalign(alignment, size, RETURN_ADDRESS(0));
-	if (caa_unlikely(!p)) errno = ENOMEM;
+	if (caa_unlikely(err)) {
+		errno = err;
+		return 0;
+	}
 	return p;
 }
 
-static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; }
+void *memalign(size_t alignment, size_t size)
+{
+	void *p;
+	int err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
+	return memalign_result(err, p);
+}
 
 int posix_memalign(void **p, size_t alignment, size_t size)
 {
-	size_t d = alignment / sizeof(void*);
-	size_t r = alignment % sizeof(void*);
-
-	if (r != 0 || d == 0 || !is_power_of_two(d))
-		return EINVAL;
-
-	*p = internal_memalign(alignment, size, RETURN_ADDRESS(0));
-	return *p ? 0 : ENOMEM;
+	return internal_memalign(p, alignment, size, RETURN_ADDRESS(0));
 }
 
 void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
@@ -446,9 +460,9 @@ void cfree(void *) __attribute__((alias("free")));
 
 void *valloc(size_t size)
 {
-	void *p = internal_memalign(page_size, size, RETURN_ADDRESS(0));
-	if (caa_unlikely(!p)) errno = ENOMEM;
-	return p;
+	void *p;
+	int err = internal_memalign(&p, page_size, size, RETURN_ADDRESS(0));
+	return memalign_result(err, p);
 }
 
 #if __GNUC__ < 7
@@ -465,15 +479,15 @@ void *pvalloc(size_t size)
 {
 	size_t alignment = page_size;
 	void *p;
+	int err;
 
 	if (add_overflow_p(size, alignment)) {
 		errno = ENOMEM;
 		return 0;
 	}
 	size = size_align(size, alignment);
-	p = internal_memalign(alignment, size, RETURN_ADDRESS(0));
-	if (caa_unlikely(!p)) errno = ENOMEM;
-	return p;
+	err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
+	return memalign_result(err, p);
 }
 
 void *malloc(size_t size)
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 08/19] reduce stack usage from file names
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (6 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 07/19] memalign: check alignment on all public functions Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 09/19] resolve real_malloc earlier for C++ programs Eric Wong
                   ` (10 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

Ruby source locations can be extremely long and use excessive
stack space.  Move it off stack for the temporary key lookup
to reduce the likelyhood of stack overflows.  No need to use
TSD (which is stored on stack with GCC anyways); as we have
GVL at that point.
---
 ext/mwrap/mwrap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 6109070..d08ebf0 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -291,6 +291,7 @@ static struct src_loc *update_stats_rcu(size_t size, uintptr_t caller)
 		const char *ptr = rb_source_location_cstr(&line);
 		size_t len;
 		size_t int_size = INT2STR_MAX;
+		static char buf[PATH_MAX + INT2STR_MAX + sizeof(*k) + 2];
 
 		generation = rb_gc_count();
 
@@ -298,7 +299,7 @@ static struct src_loc *update_stats_rcu(size_t size, uintptr_t caller)
 
 		/* avoid vsnprintf or anything which could call malloc here: */
 		len = strlen(ptr);
-		k = alloca(sizeof(*k) + len + 1 + int_size + 1);
+		k = (void *)buf;
 		k->total = size;
 		dst = mempcpy(k->k, ptr, len);
 		*dst++ = ':';
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 09/19] resolve real_malloc earlier for C++ programs
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (7 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 08/19] reduce stack usage from file names Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 10/19] allow analyzing live allocations via Mwrap[location] Eric Wong
                   ` (9 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

cmake (as run by the Ruby test suite for RubyGems) uses libjson
and libtasn1, which respectively call malloc (via `new') and
free before our constructor can even fire.  Apparently, C++
variable initialization may call "new" outside of any functions;
and those run before any functions with the GCC constructor
attribute.

Disclaimer: I don't know C++
---
 ext/mwrap/mwrap.c  | 34 ++++++++++++++++++++++++----------
 test/test_mwrap.rb | 16 ++++++++++++++++
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index d08ebf0..73d5a80 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -42,12 +42,11 @@ void *__malloc(size_t);
 void __free(void *);
 static void *(*real_malloc)(size_t) = __malloc;
 static void (*real_free)(void *) = __free;
-static const int ready = 1;
 #else
-static int ready;
 static void *(*real_malloc)(size_t);
 static void (*real_free)(void *);
 #endif /* !FreeBSD */
+static int resolving_malloc;
 
 /*
  * we need to fake an OOM condition while dlsym is running,
@@ -55,7 +54,7 @@ static void (*real_free)(void *);
  * symbol for the jemalloc calloc, yet
  */
 #  define RETURN_IF_NOT_READY() do { \
-	if (!ready) { \
+	if (!real_malloc) { \
 		errno = ENOMEM; \
 		return NULL; \
 	} \
@@ -93,14 +92,16 @@ __attribute__((constructor)) static void resolve_malloc(void)
 	int err;
 
 #ifndef __FreeBSD__
-	real_malloc = dlsym(RTLD_NEXT, "malloc");
+	if (!real_malloc) {
+		resolving_malloc = 1;
+		real_malloc = dlsym(RTLD_NEXT, "malloc");
+	}
 	real_free = dlsym(RTLD_NEXT, "free");
 	if (!real_malloc || !real_free) {
 		fprintf(stderr, "missing malloc/aligned_alloc/free\n"
 			"\t%p %p\n", real_malloc, real_free);
 		_exit(1);
 	}
-	ready = 1;
 #endif
 	totals = lfht_new();
 	if (!totals)
@@ -343,6 +344,8 @@ void free(void *p)
 {
 	if (p) {
 		struct alloc_hdr *h = ptr2hdr(p);
+
+		if (!real_free) return; /* oh well, leak a little */
 		if (h->as.live.loc) {
 			h->size = 0;
 			mutex_lock(h->as.live.loc->mtx);
@@ -400,7 +403,7 @@ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
 	size_t d = alignment / sizeof(void*);
 	size_t r = alignment % sizeof(void*);
 
-	if (!ready) return ENOMEM;
+	if (!real_malloc) return ENOMEM;
 
 	if (r != 0 || d == 0 || !is_power_of_two(d))
 		return EINVAL;
@@ -498,11 +501,19 @@ void *malloc(size_t size)
 	size_t asize;
 	void *p;
 
-	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
-		errno = ENOMEM;
-		return 0;
+	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
+		goto enomem;
+
+	/*
+	 * Needed for C++ global declarations using "new",
+	 * which happens before our constructor
+	 */
+	if (!real_malloc) {
+		if (resolving_malloc) goto enomem;
+		resolving_malloc = 1;
+		real_malloc = dlsym(RTLD_NEXT, "malloc");
 	}
-	RETURN_IF_NOT_READY();
+
 	rcu_read_lock();
 	l = update_stats_rcu(size, RETURN_ADDRESS(0));
 	p = h = real_malloc(asize);
@@ -513,6 +524,9 @@ void *malloc(size_t size)
 	rcu_read_unlock();
 	if (caa_unlikely(!p)) errno = ENOMEM;
 	return p;
+enomem:
+	errno = ENOMEM;
+	return 0;
 }
 
 void *calloc(size_t nmemb, size_t size)
diff --git a/test/test_mwrap.rb b/test/test_mwrap.rb
index d76e4da..d0af0f7 100644
--- a/test/test_mwrap.rb
+++ b/test/test_mwrap.rb
@@ -61,6 +61,22 @@ class TestMwrap < Test::Unit::TestCase
     end
   end
 
+  def test_cmake
+    begin
+      exp = `cmake -h`
+    rescue Errno::ENOENT
+      warn 'cmake missing'
+      return
+    end
+    assert_not_predicate exp.strip, :empty?
+    env = @@env.merge('MWRAP' => 'dump_fd:1')
+    out = IO.popen(env, %w(cmake -h), &:read)
+    assert out.start_with?(exp), 'original help exists'
+    assert_not_equal exp, out, 'includes dump output'
+    dump = out.delete_prefix(exp)
+    assert_match(/\b0x[a-f0-9]+\b/s, dump, 'dump output has addresses')
+  end
+
   def test_clear
     cmd = @@cmd + %w(
       -e ("0"*10000).clear
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 10/19] allow analyzing live allocations via Mwrap[location]
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (8 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 09/19] resolve real_malloc earlier for C++ programs Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 11/19] alias Mwrap.clear to Mwrap.reset Eric Wong
                   ` (8 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

This can be useful in apps to analyze what's live and
what's not.
---
 ext/mwrap/mwrap.c  | 124 +++++++++++++++++++++++++++++++++++++++++++--
 test/test_mwrap.rb |  28 ++++++++++
 2 files changed, 147 insertions(+), 5 deletions(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 73d5a80..1216c44 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -28,6 +28,7 @@ extern int __attribute__((weak)) ruby_thread_has_gvl_p(void);
 extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
 extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */
 extern size_t __attribute__((weak)) rb_gc_count(void);
+extern VALUE __attribute__((weak)) rb_cObject;
 
 /* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */
 #define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2)
@@ -209,6 +210,8 @@ struct alloc_hdr {
 	size_t size;
 };
 
+static char kbuf[PATH_MAX + INT2STR_MAX + sizeof(struct alloc_hdr) + 2];
+
 static struct alloc_hdr *ptr2hdr(void *p)
 {
 	return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
@@ -292,7 +295,6 @@ static struct src_loc *update_stats_rcu(size_t size, uintptr_t caller)
 		const char *ptr = rb_source_location_cstr(&line);
 		size_t len;
 		size_t int_size = INT2STR_MAX;
-		static char buf[PATH_MAX + INT2STR_MAX + sizeof(*k) + 2];
 
 		generation = rb_gc_count();
 
@@ -300,7 +302,7 @@ static struct src_loc *update_stats_rcu(size_t size, uintptr_t caller)
 
 		/* avoid vsnprintf or anything which could call malloc here: */
 		len = strlen(ptr);
-		k = (void *)buf;
+		k = (void *)kbuf;
 		k->total = size;
 		dst = mempcpy(k->k, ptr, len);
 		*dst++ = ':';
@@ -347,7 +349,7 @@ void free(void *p)
 
 		if (!real_free) return; /* oh well, leak a little */
 		if (h->as.live.loc) {
-			h->size = 0;
+			uatomic_set(&h->size, 0);
 			mutex_lock(h->as.live.loc->mtx);
 			cds_list_del_rcu(&h->anode);
 			mutex_unlock(h->as.live.loc->mtx);
@@ -739,7 +741,7 @@ static VALUE mwrap_reset(VALUE mod)
 	return Qnil;
 }
 
-static VALUE dump_ensure(VALUE ignored)
+static VALUE rcu_unlock_ensure(VALUE ignored)
 {
 	rcu_read_unlock();
 	--locating;
@@ -801,7 +803,116 @@ static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
 	++locating;
 	rcu_read_lock();
 
-	return rb_ensure(dump_each_rcu, (VALUE)&a, dump_ensure, 0);
+	return rb_ensure(dump_each_rcu, (VALUE)&a, rcu_unlock_ensure, 0);
+}
+
+static size_t
+src_loc_memsize(const void *p)
+{
+	return sizeof(struct src_loc);
+}
+
+static const rb_data_type_t src_loc_type = {
+	"source_location",
+	/* no marking, no freeing */
+	{ 0, 0, src_loc_memsize, /* reserved */ },
+	/* parent, data, [ flags ] */
+};
+
+static VALUE cSrcLoc;
+
+/*
+ * call-seq:
+ *	Mwrap[location] -> Mwrap::SourceLocation
+ *
+ * Returns the associated Mwrap::SourceLocation given the +location+
+ * String.  +location+ is either a Ruby source location path:line
+ * (e.g. "/path/to/foo.rb:5") or a hexadecimal memory address with
+ * square-braces part yielded by Mwrap.dump (e.g. "[0xdeadbeef]")
+ */
+static VALUE mwrap_aref(VALUE mod, VALUE loc)
+{
+	const char *str = StringValueCStr(loc);
+	long len = RSTRING_LEN(loc);
+	struct src_loc *k = 0;
+	uintptr_t p;
+	struct cds_lfht_iter iter;
+	struct cds_lfht_node *cur;
+	struct cds_lfht *t;
+	struct src_loc *l;
+	VALUE val = Qnil;
+	const char *c;
+
+	if ((c = memchr(str, '[', len)) && sscanf(c, "[%p]", (void **)&p)) {
+		k = (void *)kbuf;
+		memcpy(k->k, &p, sizeof(p));
+		k->capa = 0;
+		k->hval = jhash(k->k, sizeof(p), 0xdeadbeef);
+	} else {
+		k = (void *)kbuf;
+		memcpy(k->k, str, len + 1);
+		k->capa = len + 1;
+		k->hval = jhash(k->k, k->capa, 0xdeadbeef);
+	}
+
+	if (!k) return val;
+
+	rcu_read_lock();
+	t = rcu_dereference(totals);
+	if (!t) goto out_unlock;
+
+	cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
+	cur = cds_lfht_iter_get_node(&iter);
+	if (cur) {
+		l = caa_container_of(cur, struct src_loc, hnode);
+		val = TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l);
+	}
+out_unlock:
+	rcu_read_unlock();
+	return val;
+}
+
+static VALUE src_loc_each_i(VALUE p)
+{
+	struct alloc_hdr *h;
+	struct src_loc *l = (struct src_loc *)p;
+
+	cds_list_for_each_entry_rcu(h, &l->allocs, anode) {
+		size_t gen = uatomic_read(&h->as.live.gen);
+		size_t size = uatomic_read(&h->size);
+
+		if (size) {
+			VALUE v[2];
+			v[0] = SIZET2NUM(size);
+			v[1] = SIZET2NUM(gen);
+
+			rb_yield_values2(2, v);
+		}
+	}
+
+	return Qfalse;
+}
+
+/*
+ * call-seq:
+ *	loc = Mwrap[location]
+ *	loc.each { |size,generation| ... }
+ *
+ * Iterates through live allocations for a given Mwrap::SourceLocation,
+ * yielding the +size+ (in bytes) and +generation+ of each allocation.
+ * The +generation+ is the value of the GC.count method at the time
+ * the allocation was made.
+ */
+static VALUE src_loc_each(VALUE self)
+{
+	struct src_loc *l;
+	TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l);
+
+	assert(locating == 0 && "forgot to clear locating");
+	++locating;
+	rcu_read_lock();
+	rb_ensure(src_loc_each_i, (VALUE)l, rcu_unlock_ensure, 0);
+	return self;
 }
 
 /*
@@ -831,10 +942,13 @@ void Init_mwrap(void)
 	VALUE mod = rb_define_module("Mwrap");
 	id_uminus = rb_intern("-@");
 
+	cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject);
 	rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
 	rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
 	rb_define_singleton_method(mod, "reset", mwrap_reset, 0);
 	rb_define_singleton_method(mod, "each", mwrap_each, -1);
+	rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
+	rb_define_method(cSrcLoc, "each", src_loc_each, 0);
 }
 
 /* rb_cloexec_open isn't usable by non-Ruby processes */
diff --git a/test/test_mwrap.rb b/test/test_mwrap.rb
index d0af0f7..686d87d 100644
--- a/test/test_mwrap.rb
+++ b/test/test_mwrap.rb
@@ -163,6 +163,34 @@ class TestMwrap < Test::Unit::TestCase
     end
   end
 
+  def test_aref_each
+    cmd = @@cmd + %w(
+      -e count=GC.count
+      -e GC.disable
+      -e keep=("0"*10000)
+      -e loc=Mwrap["-e:3"]
+      -e loc.each{|size,gen|p([size,gen,count])}
+    )
+    buf = IO.popen(@@env, cmd, &:read)
+    assert_predicate $?, :success?
+    assert_match(/\A\[\s*\d+,\s*\d+,\s*\d+\]\s*\z/s, buf)
+    size, gen, count = eval(buf)
+    assert_operator size, :>=, 10000
+    assert_operator gen, :>=, count
+
+    cmd = @@cmd + %w(
+      -e count=GC.count
+      -e locs=""
+      -e Mwrap.each(1){|loc,tot,calls|locs<<loc}
+      -e m=locs.match(/(\[0x[a-f0-9]+\])/i)
+      -e p(loc=Mwrap["bobloblaw\t#{m[1]}"])
+      -e loc.each{|size,gen|p([size,gen,count])}
+    )
+    buf = IO.popen(@@env, cmd, &:read)
+    assert_predicate $?, :success?
+    assert_match(/\bMwrap::SourceLocation\b/, buf)
+  end
+
   def test_benchmark
     cmd = @@cmd + %w(-rbenchmark
       -e puts(Benchmark.measure{1000000.times{Time.now}}))
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 11/19] alias Mwrap.clear to Mwrap.reset
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (9 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 10/19] allow analyzing live allocations via Mwrap[location] Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 12/19] implement accessors for SourceLocation Eric Wong
                   ` (7 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

It is not safe to use the new Mwrap[] method with Mwrap.clear,
and the rcu_head overhead in src_loc structs was not worth it.
---
 ext/mwrap/mwrap.c | 56 ++++++++---------------------------------------
 1 file changed, 9 insertions(+), 47 deletions(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 1216c44..729e3a8 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -185,7 +185,6 @@ static int has_ec_p(void)
 
 /* allocated via real_malloc/real_free */
 struct src_loc {
-	struct rcu_head rcu_head;
 	pthread_mutex_t *mtx;
 	size_t calls;
 	size_t total;
@@ -667,49 +666,6 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
 	return Qnil;
 }
 
-static void
-free_src_loc(struct rcu_head *head)
-{
-	struct src_loc *l = caa_container_of(head, struct src_loc, rcu_head);
-	real_free(l);
-}
-
-static void *totals_clear(void *ign)
-{
-	struct cds_lfht *new, *old;
-	struct cds_lfht_iter iter;
-	struct src_loc *l;
-
-	new = lfht_new();
-	rcu_read_lock();
-	old = rcu_dereference(totals);
-	rcu_assign_pointer(totals, new);
-	cds_lfht_for_each_entry(old, &iter, l, hnode) {
-		cds_lfht_del(old, &l->hnode);
-		call_rcu(&l->rcu_head, free_src_loc);
-	}
-	rcu_read_unlock();
-
-	synchronize_rcu(); /* ensure totals points to new */
-	cds_lfht_destroy(old, NULL);
-	return 0;
-}
-
-/*
- * call-seq:
- *
- *	Mwrap.clear -> nil
- *
- * Atomically replaces the totals table and destroys the old one.
- * This resets all statistics. It is more expensive than `Mwrap.reset'
- * as new allocations will need to be made to repopulate the new table.
- */
-static VALUE mwrap_clear(VALUE mod)
-{
-	rb_thread_call_without_gvl(totals_clear, 0, 0, 0);
-	return Qnil;
-}
-
 static void *totals_reset(void *ign)
 {
 	struct cds_lfht *t;
@@ -732,8 +688,8 @@ static void *totals_reset(void *ign)
  *	Mwrap.reset -> nil
  *
  * Resets the the total tables by zero-ing all counters.
- * This resets all statistics and is less costly than `Mwrap.clear'
- * but is not an atomic operation.
+ * This resets all statistics.  This is not an atomic operation
+ * as other threads (outside of GVL) may increment counters.
  */
 static VALUE mwrap_reset(VALUE mod)
 {
@@ -741,6 +697,12 @@ static VALUE mwrap_reset(VALUE mod)
 	return Qnil;
 }
 
+/* :nodoc: */
+static VALUE mwrap_clear(VALUE mod)
+{
+	return mwrap_reset(mod);
+}
+
 static VALUE rcu_unlock_ensure(VALUE ignored)
 {
 	rcu_read_unlock();
@@ -944,8 +906,8 @@ void Init_mwrap(void)
 
 	cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject);
 	rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
-	rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
 	rb_define_singleton_method(mod, "reset", mwrap_reset, 0);
+	rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
 	rb_define_singleton_method(mod, "each", mwrap_each, -1);
 	rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
 	rb_define_method(cSrcLoc, "each", src_loc_each, 0);
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 12/19] implement accessors for SourceLocation
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (10 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 11/19] alias Mwrap.clear to Mwrap.reset Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 13/19] mwrap_aref: quiet -Wshorten-64-to-32 warning Eric Wong
                   ` (6 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

Knowing the average/max lifespan (in terms of GC.count) of
allocations can be useful.
---
 ext/mwrap/mwrap.c  | 146 ++++++++++++++++++++++++++++++++++++---------
 test/test_mwrap.rb |  48 +++++++++++++++
 2 files changed, 165 insertions(+), 29 deletions(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 729e3a8..7455c54 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -186,8 +186,11 @@ static int has_ec_p(void)
 /* allocated via real_malloc/real_free */
 struct src_loc {
 	pthread_mutex_t *mtx;
-	size_t calls;
 	size_t total;
+	size_t allocations;
+	size_t frees;
+	size_t age_total; /* (age_total / frees) => mean age at free */
+	size_t max_lifespan;
 	struct cds_lfht_node hnode;
 	struct cds_list_head allocs; /* <=> alloc_hdr.node */
 	uint32_t hval;
@@ -258,14 +261,17 @@ again:
 	if (cur) {
 		l = caa_container_of(cur, struct src_loc, hnode);
 		uatomic_add(&l->total, k->total);
-		uatomic_add(&l->calls, 1);
+		uatomic_add(&l->allocations, 1);
 	} else {
 		size_t n = loc_size(k);
 		l = real_malloc(sizeof(*l) + n);
 		if (!l) goto out_unlock;
 		memcpy(l, k, sizeof(*l) + n);
 		l->mtx = mutex_assign();
-		l->calls = 1;
+		l->age_total = 0;
+		l->max_lifespan = 0;
+		l->frees = 0;
+		l->allocations = 1;
 		CDS_INIT_LIST_HEAD(&l->allocs);
 		cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
 		if (cur != &l->hnode) { /* lost race */
@@ -345,13 +351,22 @@ void free(void *p)
 {
 	if (p) {
 		struct alloc_hdr *h = ptr2hdr(p);
+		struct src_loc *l = h->as.live.loc;
 
 		if (!real_free) return; /* oh well, leak a little */
-		if (h->as.live.loc) {
+		if (l) {
+			size_t age = generation - h->as.live.gen;
+
 			uatomic_set(&h->size, 0);
-			mutex_lock(h->as.live.loc->mtx);
+			uatomic_add(&l->frees, 1);
+			uatomic_add(&l->age_total, age);
+
+			mutex_lock(l->mtx);
 			cds_list_del_rcu(&h->anode);
-			mutex_unlock(h->as.live.loc->mtx);
+			if (age > l->max_lifespan)
+				l->max_lifespan = age;
+			mutex_unlock(l->mtx);
+
 			call_rcu(&h->as.dead, free_hdr_rcu);
 		}
 		else {
@@ -621,7 +636,7 @@ static void *dump_to_file(void *x)
 			p = s[0];
 		}
 		fprintf(a->fp, "%16zu %12zu %s\n",
-			l->total, l->calls, (const char *)p);
+			l->total, l->allocations, (const char *)p);
 		if (s) free(s);
 	}
 out_unlock:
@@ -676,7 +691,10 @@ static void *totals_reset(void *ign)
 	t = rcu_dereference(totals);
 	cds_lfht_for_each_entry(t, &iter, l, hnode) {
 		uatomic_set(&l->total, 0);
-		uatomic_set(&l->calls, 0);
+		uatomic_set(&l->allocations, 0);
+		uatomic_set(&l->frees, 0);
+		uatomic_set(&l->age_total, 0);
+		uatomic_set(&l->max_lifespan, 0);
 	}
 	rcu_read_unlock();
 	return 0;
@@ -710,6 +728,27 @@ static VALUE rcu_unlock_ensure(VALUE ignored)
 	return Qfalse;
 }
 
+static VALUE location_string(struct src_loc *l)
+{
+	VALUE ret, tmp;
+
+	if (loc_is_addr(l)) {
+		char **s = backtrace_symbols((void *)l->k, 1);
+		tmp = rb_str_new_cstr(s[0]);
+		free(s);
+	}
+	else {
+		tmp = rb_str_new(l->k, l->capa - 1);
+	}
+
+	/* deduplicate and try to free up some memory */
+	ret = rb_funcall(tmp, id_uminus, 0);
+	if (!OBJ_FROZEN_RAW(tmp))
+		rb_str_resize(tmp, 0);
+
+	return ret;
+}
+
 static VALUE dump_each_rcu(VALUE x)
 {
 	struct dump_arg *a = (struct dump_arg *)x;
@@ -719,27 +758,17 @@ static VALUE dump_each_rcu(VALUE x)
 
 	t = rcu_dereference(totals);
 	cds_lfht_for_each_entry(t, &iter, l, hnode) {
-		VALUE v[3];
+		VALUE v[6];
 		if (l->total <= a->min) continue;
 
-		if (loc_is_addr(l)) {
-			char **s = backtrace_symbols((void *)l->k, 1);
-			v[1] = rb_str_new_cstr(s[0]);
-			free(s);
-		}
-		else {
-			v[1] = rb_str_new(l->k, l->capa - 1);
-		}
-
-		/* deduplicate and try to free up some memory */
-		v[0] = rb_funcall(v[1], id_uminus, 0);
-		if (!OBJ_FROZEN_RAW(v[1]))
-			rb_str_resize(v[1], 0);
-
+		v[0] = location_string(l);
 		v[1] = SIZET2NUM(l->total);
-		v[2] = SIZET2NUM(l->calls);
+		v[2] = SIZET2NUM(l->allocations);
+		v[3] = SIZET2NUM(l->frees);
+		v[4] = SIZET2NUM(l->age_total);
+		v[5] = SIZET2NUM(l->max_lifespan);
 
-		rb_yield_values2(3, v);
+		rb_yield_values2(6, v);
 		assert(rcu_read_ongoing());
 	}
 	return Qnil;
@@ -748,10 +777,12 @@ static VALUE dump_each_rcu(VALUE x)
 /*
  * call-seq:
  *
- * 	Mwrap.each([min]) { |location,total_bytes,call_count| ... }
+ *	Mwrap.each([min]) do |location,total,allocations,frees,age_total,max_lifespan|
+ *	  ...
+ *	end
  *
  * Yields each entry of the of the table to a caller-supplied block.
- * +min+ may be specified to filter out lines with +total_bytes+
+ * +min+ may be specified to filter out lines with +total+ bytes
  * equal-to-or-smaller-than the supplied minimum.
  */
 static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
@@ -855,6 +886,14 @@ static VALUE src_loc_each_i(VALUE p)
 	return Qfalse;
 }
 
+static struct src_loc *src_loc_get(VALUE self)
+{
+	struct src_loc *l;
+	TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l);
+	assert(l);
+	return l;
+}
+
 /*
  * call-seq:
  *	loc = Mwrap[location]
@@ -867,8 +906,7 @@ static VALUE src_loc_each_i(VALUE p)
  */
 static VALUE src_loc_each(VALUE self)
 {
-	struct src_loc *l;
-	TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l);
+	struct src_loc *l = src_loc_get(self);
 
 	assert(locating == 0 && "forgot to clear locating");
 	++locating;
@@ -877,6 +915,50 @@ static VALUE src_loc_each(VALUE self)
 	return self;
 }
 
+static VALUE src_loc_mean_lifespan(VALUE self)
+{
+	struct src_loc *l = src_loc_get(self);
+	size_t tot, frees;
+
+	frees = uatomic_read(&l->frees);
+	tot = uatomic_read(&l->age_total);
+	return DBL2NUM(frees ? ((double)tot/(double)frees) : HUGE_VAL);
+}
+
+static VALUE src_loc_frees(VALUE self)
+{
+	return SIZET2NUM(uatomic_read(&src_loc_get(self)->frees));
+}
+
+static VALUE src_loc_allocations(VALUE self)
+{
+	return SIZET2NUM(uatomic_read(&src_loc_get(self)->allocations));
+}
+
+static VALUE src_loc_total(VALUE self)
+{
+	return SIZET2NUM(uatomic_read(&src_loc_get(self)->total));
+}
+
+static VALUE src_loc_max_lifespan(VALUE self)
+{
+	return SIZET2NUM(uatomic_read(&src_loc_get(self)->max_lifespan));
+}
+
+/*
+ * Returns a frozen String location of the given SourceLocation object.
+ */
+static VALUE src_loc_name(VALUE self)
+{
+	struct src_loc *l = src_loc_get(self);
+	VALUE ret;
+
+	++locating;
+	ret = location_string(l);
+	--locating;
+	return ret;
+}
+
 /*
  * Document-module: Mwrap
  *
@@ -911,6 +993,12 @@ void Init_mwrap(void)
 	rb_define_singleton_method(mod, "each", mwrap_each, -1);
 	rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
 	rb_define_method(cSrcLoc, "each", src_loc_each, 0);
+	rb_define_method(cSrcLoc, "frees", src_loc_frees, 0);
+	rb_define_method(cSrcLoc, "allocations", src_loc_allocations, 0);
+	rb_define_method(cSrcLoc, "total", src_loc_total, 0);
+	rb_define_method(cSrcLoc, "mean_lifespan", src_loc_mean_lifespan, 0);
+	rb_define_method(cSrcLoc, "max_lifespan", src_loc_max_lifespan, 0);
+	rb_define_method(cSrcLoc, "name", src_loc_name, 0);
 }
 
 /* rb_cloexec_open isn't usable by non-Ruby processes */
diff --git a/test/test_mwrap.rb b/test/test_mwrap.rb
index 686d87d..2234f7d 100644
--- a/test/test_mwrap.rb
+++ b/test/test_mwrap.rb
@@ -203,4 +203,52 @@ class TestMwrap < Test::Unit::TestCase
   def test_mwrap_dump_check
     assert_raise(TypeError) { Mwrap.dump(:bogus) }
   end
+
+  def assert_separately(src, *opts)
+    Tempfile.create(%w(mwrap .rb)) do |tmp|
+      tmp.write(src.lstrip!)
+      tmp.flush
+      assert(system(@@env, *@@cmd, tmp.path, *opts))
+    end
+  end
+
+  def test_source_location
+    assert_separately(+"#{<<~"begin;"}\n#{<<~'end;'}")
+    begin;
+      require 'mwrap'
+      foo = '0' * 10000
+      k = -"#{__FILE__}:2"
+      loc = Mwrap[k]
+      loc.name == k or abort 'SourceLocation#name broken'
+      loc.total >= 10000 or abort 'SourceLocation#total broken'
+      loc.frees == 0 or abort 'SourceLocation#frees broken'
+      loc.allocations == 1 or abort 'SourceLocation#allocations broken'
+      seen = false
+      loc.each do |*x| seen = x end
+      seen[1] == loc.total or 'SourceLocation#each broken'
+      foo.clear
+
+      # wait for call_rcu to perform real_free
+      freed = false
+      until freed
+        freed = true
+        loc.each do freed = false end
+      end
+      loc.frees == 1 or abort 'SourceLocation#frees broken (after free)'
+      Float === loc.mean_lifespan or abort 'mean_lifespan broken'
+      Integer === loc.max_lifespan or abort 'max_lifespan broken'
+
+      addr = false
+      Mwrap.each do |a,|
+        if a =~ /\[0x[a-f0-9]+\]/
+          addr = a
+          break
+        end
+      end
+      addr.frozen? or abort 'Mwrap.each returned unfrozen address'
+      loc = Mwrap[addr] or abort "Mwrap[#{addr}] broken"
+      addr == loc.name or abort 'SourceLocation#name works on address'
+      loc.name.frozen? or abort 'SourceLocation#name not frozen'
+    end;
+  end
 end
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 13/19] mwrap_aref: quiet -Wshorten-64-to-32 warning
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (11 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 12/19] implement accessors for SourceLocation Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 14/19] fixes for FreeBSD 11.1 Eric Wong
                   ` (5 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

An "int" length is enough for the source location length, as
it's not going to exceed PATH_MAX by much.
---
 ext/mwrap/mwrap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 7455c54..2b946b1 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -826,7 +826,7 @@ static VALUE cSrcLoc;
 static VALUE mwrap_aref(VALUE mod, VALUE loc)
 {
 	const char *str = StringValueCStr(loc);
-	long len = RSTRING_LEN(loc);
+	int len = RSTRING_LENINT(loc);
 	struct src_loc *k = 0;
 	uintptr_t p;
 	struct cds_lfht_iter iter;
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 14/19] fixes for FreeBSD 11.1...
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (12 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 13/19] mwrap_aref: quiet -Wshorten-64-to-32 warning Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 15/19] use memrchr to extract address under glibc Eric Wong
                   ` (4 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

PTHREAD_MUTEX_INITIALIZER is incomplete, in that it does lazy
initialization when pthread_mutex_init is called.  So we call
pthread_mutex_init explicitly, as well as calling rcu_read_lock
during the constructor to initialize mutexes used by urcu-bp.
---
 ext/mwrap/mwrap.c  | 78 ++++++++++++++++++++++++++++++++--------------
 test/test_mwrap.rb |  5 +--
 2 files changed, 57 insertions(+), 26 deletions(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 2b946b1..421c086 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -41,13 +41,13 @@ int __attribute__((weak)) ruby_thread_has_gvl_p(void)
 #ifdef __FreeBSD__
 void *__malloc(size_t);
 void __free(void *);
-static void *(*real_malloc)(size_t) = __malloc;
-static void (*real_free)(void *) = __free;
+#  define real_malloc __malloc
+#  define real_free __free
 #else
 static void *(*real_malloc)(size_t);
 static void (*real_free)(void *);
-#endif /* !FreeBSD */
 static int resolving_malloc;
+#endif /* !FreeBSD */
 
 /*
  * we need to fake an OOM condition while dlsym is running,
@@ -92,7 +92,26 @@ __attribute__((constructor)) static void resolve_malloc(void)
 {
 	int err;
 
-#ifndef __FreeBSD__
+#ifdef __FreeBSD__
+	/*
+	 * PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization,
+	 * which happens at pthread_mutex_lock, and that calls calloc
+	 */
+	{
+		size_t i;
+
+		for (i = 0; i < MUTEX_NR; i++) {
+			err = pthread_mutex_init(&mutexes[i].mtx, 0);
+			if (err) {
+				fprintf(stderr, "error: %s\n", strerror(err));
+				_exit(1);
+			}
+		}
+		/* initialize mutexes used by urcu-bp */
+		rcu_read_lock();
+		rcu_read_unlock();
+	}
+#else /* !FreeBSD (tested on GNU/Linux) */
 	if (!real_malloc) {
 		resolving_malloc = 1;
 		real_malloc = dlsym(RTLD_NEXT, "malloc");
@@ -103,7 +122,7 @@ __attribute__((constructor)) static void resolve_malloc(void)
 			"\t%p %p\n", real_malloc, real_free);
 		_exit(1);
 	}
-#endif
+#endif /* !FreeBSD */
 	totals = lfht_new();
 	if (!totals)
 		fprintf(stderr, "failed to allocate totals table\n");
@@ -285,16 +304,21 @@ out_unlock:
 	return l;
 }
 
-static struct src_loc *update_stats_rcu(size_t size, uintptr_t caller)
+static void update_stats_rcu_unlock(const struct src_loc *l)
+{
+	if (caa_likely(l)) rcu_read_unlock();
+}
+
+static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller)
 {
 	struct src_loc *k, *ret = 0;
 	static const size_t xlen = sizeof(caller);
 	char *dst;
 
-	assert(rcu_read_ongoing());
-
+	if (caa_unlikely(!totals)) return 0;
 	if (locating++) goto out; /* do not recurse into another *alloc */
 
+	rcu_read_lock();
 	if (has_ec_p()) {
 		int line;
 		const char *ptr = rb_source_location_cstr(&line);
@@ -379,7 +403,6 @@ static void
 alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
 {
 	/* we need src_loc to remain alive for the duration of this call */
-	assert(rcu_read_ongoing());
 	if (!h) return;
 	h->size = size;
 	h->real = real;
@@ -437,8 +460,7 @@ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
 		return ENOMEM;
 
 	/* assert(asize == (alignment + size + sizeof(struct alloc_hdr))); */
-	rcu_read_lock();
-	l = update_stats_rcu(size, caller);
+	l = update_stats_rcu_lock(size, caller);
 	real = real_malloc(asize);
 	if (real) {
 		void *p = hdr2ptr(real);
@@ -448,7 +470,7 @@ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
 		alloc_insert_rcu(l, h, size, real);
 		*pp = p;
 	}
-	rcu_read_unlock();
+	update_stats_rcu_unlock(l);
 
 	return real ? 0 : ENOMEM;
 }
@@ -524,20 +546,20 @@ void *malloc(size_t size)
 	 * Needed for C++ global declarations using "new",
 	 * which happens before our constructor
 	 */
+#ifndef __FreeBSD__
 	if (!real_malloc) {
 		if (resolving_malloc) goto enomem;
 		resolving_malloc = 1;
 		real_malloc = dlsym(RTLD_NEXT, "malloc");
 	}
-
-	rcu_read_lock();
-	l = update_stats_rcu(size, RETURN_ADDRESS(0));
+#endif
+	l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
 	p = h = real_malloc(asize);
 	if (h) {
 		alloc_insert_rcu(l, h, size, h);
 		p = hdr2ptr(h);
 	}
-	rcu_read_unlock();
+	update_stats_rcu_unlock(l);
 	if (caa_unlikely(!p)) errno = ENOMEM;
 	return p;
 enomem:
@@ -561,15 +583,14 @@ void *calloc(size_t nmemb, size_t size)
 		return 0;
 	}
 	RETURN_IF_NOT_READY();
-	rcu_read_lock();
-	l = update_stats_rcu(size, RETURN_ADDRESS(0));
+	l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
 	p = h = real_malloc(asize);
 	if (p) {
 		alloc_insert_rcu(l, h, size, h);
 		p = hdr2ptr(h);
 		memset(p, 0, size);
 	}
-	rcu_read_unlock();
+	update_stats_rcu_unlock(l);
 	if (caa_unlikely(!p)) errno = ENOMEM;
 	return p;
 }
@@ -591,14 +612,13 @@ void *realloc(void *ptr, size_t size)
 	}
 	RETURN_IF_NOT_READY();
 
-	rcu_read_lock();
-	l = update_stats_rcu(size, RETURN_ADDRESS(0));
+	l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
 	p = h = real_malloc(asize);
 	if (p) {
 		alloc_insert_rcu(l, h, size, h);
 		p = hdr2ptr(h);
 	}
-	rcu_read_unlock();
+	update_stats_rcu_unlock(l);
 
 	if (ptr && p) {
 		struct alloc_hdr *old = ptr2hdr(ptr);
@@ -814,6 +834,17 @@ static const rb_data_type_t src_loc_type = {
 
 static VALUE cSrcLoc;
 
+static int
+extract_addr(const char *str, size_t len, void **p)
+{
+	const char *c;
+#if defined(__GLIBC__)
+	return ((c = memchr(str, '[', len)) && sscanf(c, "[%p]", p));
+#else /* tested FreeBSD */
+	return ((c = strstr(str, "0x")) && sscanf(c, "%p", p));
+#endif
+}
+
 /*
  * call-seq:
  *	Mwrap[location] -> Mwrap::SourceLocation
@@ -834,9 +865,8 @@ static VALUE mwrap_aref(VALUE mod, VALUE loc)
 	struct cds_lfht *t;
 	struct src_loc *l;
 	VALUE val = Qnil;
-	const char *c;
 
-	if ((c = memchr(str, '[', len)) && sscanf(c, "[%p]", (void **)&p)) {
+	if (extract_addr(str, len, (void **)&p)) {
 		k = (void *)kbuf;
 		memcpy(k->k, &p, sizeof(p));
 		k->capa = 0;
diff --git a/test/test_mwrap.rb b/test/test_mwrap.rb
index 2234f7d..bc8694e 100644
--- a/test/test_mwrap.rb
+++ b/test/test_mwrap.rb
@@ -183,6 +183,7 @@ class TestMwrap < Test::Unit::TestCase
       -e locs=""
       -e Mwrap.each(1){|loc,tot,calls|locs<<loc}
       -e m=locs.match(/(\[0x[a-f0-9]+\])/i)
+      -e m||=locs.match(/\b(0x[a-f0-9]+)\b/i)
       -e p(loc=Mwrap["bobloblaw\t#{m[1]}"])
       -e loc.each{|size,gen|p([size,gen,count])}
     )
@@ -240,12 +241,12 @@ class TestMwrap < Test::Unit::TestCase
 
       addr = false
       Mwrap.each do |a,|
-        if a =~ /\[0x[a-f0-9]+\]/
+        if a =~ /0x[a-f0-9]+/
           addr = a
           break
         end
       end
-      addr.frozen? or abort 'Mwrap.each returned unfrozen address'
+      addr && addr.frozen? or abort 'Mwrap.each returned unfrozen address'
       loc = Mwrap[addr] or abort "Mwrap[#{addr}] broken"
       addr == loc.name or abort 'SourceLocation#name works on address'
       loc.name.frozen? or abort 'SourceLocation#name not frozen'
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 15/19] use memrchr to extract address under glibc
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (13 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 14/19] fixes for FreeBSD 11.1 Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 16/19] do not track allocations for constructor and Init_ Eric Wong
                   ` (3 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

glibc will place the (virtual) address at the end, and this allows us to
find addresses for programs which put '[' in their progname (e.g. ssh,
mogilefsd, unicorn, avahi, ...)
---
 ext/mwrap/mwrap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 421c086..0a1f9b5 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -839,7 +839,7 @@ extract_addr(const char *str, size_t len, void **p)
 {
 	const char *c;
 #if defined(__GLIBC__)
-	return ((c = memchr(str, '[', len)) && sscanf(c, "[%p]", p));
+	return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p));
 #else /* tested FreeBSD */
 	return ((c = strstr(str, "0x")) && sscanf(c, "%p", p));
 #endif
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 16/19] do not track allocations for constructor and Init_
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (14 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 15/19] use memrchr to extract address under glibc Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 17/19] disable memalign tracking by default Eric Wong
                   ` (2 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

There's nothing a user can do about allocations which happen
in our constructor (aside from not using mwrap), so do not
track them.
---
 ext/mwrap/mwrap.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 0a1f9b5..627bb63 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -61,6 +61,7 @@ static int resolving_malloc;
 	} \
 } while (0)
 
+static __thread size_t locating;
 static size_t generation;
 static size_t page_size;
 static struct cds_lfht *totals;
@@ -91,6 +92,7 @@ lfht_new(void)
 __attribute__((constructor)) static void resolve_malloc(void)
 {
 	int err;
+	++locating;
 
 #ifdef __FreeBSD__
 	/*
@@ -133,6 +135,7 @@ __attribute__((constructor)) static void resolve_malloc(void)
 	if (err)
 		fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
 	page_size = sysconf(_SC_PAGESIZE);
+	--locating;
 }
 
 static void
@@ -162,8 +165,6 @@ my_mempcpy(void *dest, const void *src, size_t n)
 #define RETURN_ADDRESS(nr) \
   (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
 
-static __thread size_t locating;
-
 #define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19)
 static char *int2str(int num, char *dst, size_t * size)
 {
@@ -1013,7 +1014,10 @@ static VALUE src_loc_name(VALUE self)
  */
 void Init_mwrap(void)
 {
-	VALUE mod = rb_define_module("Mwrap");
+	VALUE mod;
+
+	++locating;
+        mod = rb_define_module("Mwrap");
 	id_uminus = rb_intern("-@");
 
 	cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject);
@@ -1029,6 +1033,7 @@ void Init_mwrap(void)
 	rb_define_method(cSrcLoc, "mean_lifespan", src_loc_mean_lifespan, 0);
 	rb_define_method(cSrcLoc, "max_lifespan", src_loc_max_lifespan, 0);
 	rb_define_method(cSrcLoc, "name", src_loc_name, 0);
+	--locating;
 }
 
 /* rb_cloexec_open isn't usable by non-Ruby processes */
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 17/19] disable memalign tracking by default
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (15 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 16/19] do not track allocations for constructor and Init_ Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 18/19] support Mwrap.quiet to temporarily disable allocation tracking Eric Wong
  2018-07-16 21:19 ` [PATCH 19/19] mwrap_rack: Rack app to track live allocations Eric Wong
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

Users will still pay the cost in memory overhead, but this
gets rid of misleading statistics.  Tracking actual Ruby
object allocations is better left to Ruby itself.
---
 ext/mwrap/mwrap.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 627bb63..2752598 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -23,6 +23,7 @@
 #include "jhash.h"
 
 static ID id_uminus;
+static unsigned int track_memalign;
 const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */
 extern int __attribute__((weak)) ruby_thread_has_gvl_p(void);
 extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
@@ -92,6 +93,7 @@ lfht_new(void)
 __attribute__((constructor)) static void resolve_malloc(void)
 {
 	int err;
+	const char *opt;
 	++locating;
 
 #ifdef __FreeBSD__
@@ -135,6 +137,11 @@ __attribute__((constructor)) static void resolve_malloc(void)
 	if (err)
 		fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
 	page_size = sysconf(_SC_PAGESIZE);
+	opt = getenv("MWRAP");
+	if (opt && (opt = strstr(opt, "memalign:"))) {
+		if (!sscanf(opt, "memalign:%u", &track_memalign))
+			fprintf(stderr, "not an unsigned int: %s\n", opt);
+	}
 	--locating;
 }
 
@@ -461,7 +468,7 @@ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
 		return ENOMEM;
 
 	/* assert(asize == (alignment + size + sizeof(struct alloc_hdr))); */
-	l = update_stats_rcu_lock(size, caller);
+	l = track_memalign ? update_stats_rcu_lock(size, caller) : 0;
 	real = real_malloc(asize);
 	if (real) {
 		void *p = hdr2ptr(real);
@@ -1008,9 +1015,16 @@ static VALUE src_loc_name(VALUE self)
  * * dump_fd: a writable FD to dump to
  * * dump_path: a path to dump to, the file is opened in O_APPEND mode
  * * dump_min: the minimum allocation size (total) to dump
+ * * memalign: use `1' to enable tracking the memalign family
  *
  * If both `dump_fd' and `dump_path' are specified, dump_path takes
  * precedence.
+ *
+ * Tracking the memalign family of functions is misleading for Ruby
+ * applications, as heap page allocations can happen anywhere a
+ * Ruby object is allocated, even in the coldest code paths.
+ * Furthermore, it is rarely-used outside of the Ruby object allocator.
+ * Thus tracking memalign functions is disabled by default.
  */
 void Init_mwrap(void)
 {
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 18/19] support Mwrap.quiet to temporarily disable allocation tracking
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (16 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 17/19] disable memalign tracking by default Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  2018-07-16 21:19 ` [PATCH 19/19] mwrap_rack: Rack app to track live allocations Eric Wong
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

Tracking memory used for monitoring code itself is noise,
so give users the power to omit it.
---
 ext/mwrap/mwrap.c  | 22 ++++++++++++++++++++++
 test/test_mwrap.rb | 20 ++++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 2752598..c1a59e7 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -30,6 +30,7 @@ extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
 extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */
 extern size_t __attribute__((weak)) rb_gc_count(void);
 extern VALUE __attribute__((weak)) rb_cObject;
+extern VALUE __attribute__((weak)) rb_yield(VALUE);
 
 /* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */
 #define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2)
@@ -997,6 +998,26 @@ static VALUE src_loc_name(VALUE self)
 	return ret;
 }
 
+static VALUE reset_locating(VALUE ign) { --locating; return Qfalse; }
+
+/*
+ * call-seq:
+ *
+ *	Mwrap.quiet do |depth|
+ *	  # expensive sort/calculate/emitting results of Mwrap.each
+ *	  # affecting statistics of the rest of the app
+ *	end
+ *
+ * Stops allocation tracking inside the block.  This is useful for
+ * monitoring code which calls other Mwrap (or ObjectSpace/GC)
+ * functions which unavoidably allocate memory.
+ */
+static VALUE mwrap_quiet(VALUE mod)
+{
+	size_t cur = ++locating;
+	return rb_ensure(rb_yield, SIZET2NUM(cur), reset_locating, 0);
+}
+
 /*
  * Document-module: Mwrap
  *
@@ -1040,6 +1061,7 @@ void Init_mwrap(void)
 	rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
 	rb_define_singleton_method(mod, "each", mwrap_each, -1);
 	rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
+	rb_define_singleton_method(mod, "quiet", mwrap_quiet, 0);
 	rb_define_method(cSrcLoc, "each", src_loc_each, 0);
 	rb_define_method(cSrcLoc, "frees", src_loc_frees, 0);
 	rb_define_method(cSrcLoc, "allocations", src_loc_allocations, 0);
diff --git a/test/test_mwrap.rb b/test/test_mwrap.rb
index bc8694e..8425c35 100644
--- a/test/test_mwrap.rb
+++ b/test/test_mwrap.rb
@@ -252,4 +252,24 @@ class TestMwrap < Test::Unit::TestCase
       loc.name.frozen? or abort 'SourceLocation#name not frozen'
     end;
   end
+
+  def test_quiet
+    assert_separately(+"#{<<~"begin;"}\n#{<<~'end;'}")
+    begin;
+      require 'mwrap'
+      before = __LINE__
+      res = Mwrap.quiet do |depth|
+        depth == 1 or abort 'depth is not 1'
+        ('a' * 10000).clear
+        Mwrap.quiet { |d| d == 2 or abort 'depth is not 2' }
+        :foo
+      end
+      after = __LINE__ - 1
+      (before..after).each do |lineno|
+        Mwrap["#{__FILE__}:#{lineno}"] and
+          abort "unexpectedly tracked allocation at line #{lineno}"
+      end
+      res == :foo or abort 'Mwrap.quiet did not return block result'
+    end;
+  end
 end
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 19/19] mwrap_rack: Rack app to track live allocations
  2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
                   ` (17 preceding siblings ...)
  2018-07-16 21:19 ` [PATCH 18/19] support Mwrap.quiet to temporarily disable allocation tracking Eric Wong
@ 2018-07-16 21:19 ` Eric Wong
  18 siblings, 0 replies; 20+ messages in thread
From: Eric Wong @ 2018-07-16 21:19 UTC (permalink / raw)
  To: mwrap-public

Might as well be able to see and inspect what allocations
are alive when developing a Rack application.

Demo is available at https://80x24.org/MWRAP/each/2000 (note:
"MWRAP" is capitalized)  This is also running "repobrowse",
which will eventually replace cgit on 80x24.org, but is NOT
running the main HTTPS termination at https://80x24.org/

Note: this demo machine is 32-bit, so yes, it will overflow
---
 lib/mwrap_rack.rb | 105 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 lib/mwrap_rack.rb

diff --git a/lib/mwrap_rack.rb b/lib/mwrap_rack.rb
new file mode 100644
index 0000000..ef3872b
--- /dev/null
+++ b/lib/mwrap_rack.rb
@@ -0,0 +1,105 @@
+# Copyright (C) 2018 all contributors <mwrap@80x24.org>
+# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+# frozen_string_literal: true
+require 'mwrap'
+require 'rack'
+require 'cgi'
+
+# Usage: mwrap rackup ...
+class MwrapRack
+  module HtmlResponse
+    def response
+      [ 200, {
+          'Expires' => 'Fri, 01 Jan 1980 00:00:00 GMT',
+          'Pragma' => 'no-cache',
+          'Cache-Control' => 'no-cache, max-age=0, must-revalidate',
+          'Content-Type' => 'text/html; charset=UTF-8',
+        }, self ]
+    end
+  end
+
+  class Each < Struct.new(:script_name, :min, :sort)
+    include HtmlResponse
+    HEADER = '<tr><th>' + %w(total allocations frees mean_life max_life
+                location).join('</th><th>') + '</th></tr>'
+    FIELDS = %w(total allocations frees mean_life max_life location)
+    def each
+      Mwrap.quiet do
+        t = -"Mwrap.each(#{min})"
+        sn = script_name
+        all = []
+        f = FIELDS.dup
+        sc = FIELDS.index(sort || 'total') || 0
+        f[sc] = -"<b>#{f[sc]}</b>"
+        f.map! do |hdr|
+          if hdr.start_with?('<b>')
+            hdr
+          else
+            -%Q(<a\nhref="#{sn}/each/#{min}?sort=#{hdr}">#{hdr}</a>)
+          end
+        end
+        Mwrap.each(min) do |loc, total, allocations, frees, age_sum, max_life|
+          mean_life = frees == 0 ? Float::INFINITY : age_sum/frees.to_f
+          all << [total,allocations,frees,mean_life,max_life,loc]
+        end
+        all.sort_by! { |cols| -cols[sc] }
+
+        yield(-"<html><head><title>#{t}</title></head>" \
+               "<body><h1>#{t}</h1>\n" \
+               "<h2>Current generation: #{GC.count}</h2>\n<table>\n" \
+               "<tr><th>#{f.join('</th><th>')}</th></tr>\n")
+        all.each do |cols|
+          loc = cols.pop
+          cols[3] = sprintf('%0.3f', cols[3]) # mean_life
+          href = -(+"#{sn}/at/#{CGI.escape(loc)}").encode!(xml: :attr)
+          yield(%Q(<tr><td>#{cols.join('</td><td>')}<td><a\nhref=#{
+                  href}>#{-loc.encode(xml: :text)}</a></td></tr>\n))
+          cols.clear
+        end.clear
+        yield "</table></body></html>\n"
+      end
+    end
+  end
+
+  class EachAt < Struct.new(:loc)
+    include HtmlResponse
+    HEADER = '<tr><th>size</th><th>generation</th></tr>'
+
+    def each
+      t = loc.name.encode(xml: :text)
+      yield(-"<html><head><title>#{t}</title></head>" \
+             "<body><h1>live allocations at #{t}</h1>" \
+             "<h2>Current generation: #{GC.count}</h2>\n<table>#{HEADER}")
+      loc.each do |size, generation|
+        yield("<tr><td>#{size}</td><td>#{generation}</td></tr>\n")
+      end
+      yield "</table></body></html>\n"
+    end
+  end
+
+  def r404
+    [404,{'Content-Type'=>'text/plain'},["Not found\n"]]
+  end
+
+  def call(env)
+    case env['PATH_INFO']
+    when %r{\A/each/(\d+)\z}
+      min = $1.to_i
+      m = env['QUERY_STRING'].match(/\bsort=(\w+)/)
+      Each.new(env['SCRIPT_NAME'], min, m ? m[1] : nil).response
+    when %r{\A/at/(.*)\z}
+      loc = -CGI.unescape($1)
+      loc = Mwrap[loc] or return r404
+      EachAt.new(loc).response
+    when '/'
+      n = 2000
+      u = 'https://80x24.org/mwrap/README.html'
+      b = -('<html><head><title>Mwrap demo</title></head>' \
+          "<body><p><a href=\"each/#{n}\">allocations &gt;#{n} bytes</a>" \
+          "<p><a href=\"#{u}\">#{u}</a></body></html>\n")
+      [ 200, {'Content-Type'=>'text/html','Content-Length'=>-b.size.to_s},[b]]
+    else
+      r404
+    end
+  end
+end
-- 
EW


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, back to index

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-16 21:19 [PATCH 0/19] the heavy version of mwrap Eric Wong
2018-07-16 21:19 ` [PATCH 01/19] support per-allocation headers for per-alloc tracking Eric Wong
2018-07-16 21:19 ` [PATCH 02/19] mwrap: use malloc to do our own memalign Eric Wong
2018-07-16 21:19 ` [PATCH 03/19] hold RCU read lock to insert each allocation Eric Wong
2018-07-16 21:19 ` [PATCH 04/19] realloc: do not copy if allocation failed Eric Wong
2018-07-16 21:19 ` [PATCH 05/19] internal_memalign: do not assume real_malloc succeeds Eric Wong
2018-07-16 21:19 ` [PATCH 06/19] ensure ENOMEM is preserved in errno when appropriate Eric Wong
2018-07-16 21:19 ` [PATCH 07/19] memalign: check alignment on all public functions Eric Wong
2018-07-16 21:19 ` [PATCH 08/19] reduce stack usage from file names Eric Wong
2018-07-16 21:19 ` [PATCH 09/19] resolve real_malloc earlier for C++ programs Eric Wong
2018-07-16 21:19 ` [PATCH 10/19] allow analyzing live allocations via Mwrap[location] Eric Wong
2018-07-16 21:19 ` [PATCH 11/19] alias Mwrap.clear to Mwrap.reset Eric Wong
2018-07-16 21:19 ` [PATCH 12/19] implement accessors for SourceLocation Eric Wong
2018-07-16 21:19 ` [PATCH 13/19] mwrap_aref: quiet -Wshorten-64-to-32 warning Eric Wong
2018-07-16 21:19 ` [PATCH 14/19] fixes for FreeBSD 11.1 Eric Wong
2018-07-16 21:19 ` [PATCH 15/19] use memrchr to extract address under glibc Eric Wong
2018-07-16 21:19 ` [PATCH 16/19] do not track allocations for constructor and Init_ Eric Wong
2018-07-16 21:19 ` [PATCH 17/19] disable memalign tracking by default Eric Wong
2018-07-16 21:19 ` [PATCH 18/19] support Mwrap.quiet to temporarily disable allocation tracking Eric Wong
2018-07-16 21:19 ` [PATCH 19/19] mwrap_rack: Rack app to track live allocations Eric Wong

mwrap user+dev discussion/patches/pulls/bugs/help

Archives are clonable:
	git clone --mirror https://80x24.org/mwrap-public/0 mwrap-public/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 mwrap-public mwrap-public/ https://80x24.org/mwrap-public \
		mwrap-public@80x24.org
	public-inbox-index mwrap-public

Example config snippet for mirrors

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.lang.ruby.mwrap
	nntp://ou63pmih66umazou.onion/inbox.comp.lang.ruby.mwrap

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox