From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-2.6 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, UNWANTED_LANGUAGE_BODY shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id C518F1F454 for ; Thu, 31 Oct 2019 20:03:23 +0000 (UTC) From: Eric Wong To: mwrap-perl@80x24.org Subject: [PATCH] port to Perl5 and XS Date: Thu, 31 Oct 2019 20:03:23 +0000 Message-Id: <20191031200323.4156-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: I mainly use Perl5 (again :P), and sometimes tracking down where malloc calls happen is necessary. I don't know of any malloc wrapper interface which is aware of Perl source locations. Valgrind and similar tools can only figure out C source locations, which isn't very useful when hacking in Perl. --- .document | 2 - .gitignore | 12 +- .olddoc.yml | 8 - MANIFEST | 18 +- Makefile.PL | 72 ++ Mwrap.xs | 891 +++++++++++++++++++++ README | 86 +- Rakefile | 16 - bin/mwrap | 36 - ext/mwrap/extconf.rb | 28 - ext/mwrap/mwrap.c | 1464 ---------------------------------- ext/mwrap/jhash.h => jhash.h | 0 lib/Devel/Mwrap.pm | 15 + lib/mwrap_rack.rb | 172 ---- mwrap.gemspec | 32 - script/mwrap-perl | 34 + t/mwrap.t | 85 ++ t/source_location.perl | 9 + test/test_mwrap.rb | 322 -------- typemap | 4 + 20 files changed, 1163 insertions(+), 2143 deletions(-) delete mode 100644 .document delete mode 100644 .olddoc.yml create mode 100644 Makefile.PL create mode 100644 Mwrap.xs delete mode 100644 Rakefile delete mode 100755 bin/mwrap delete mode 100644 ext/mwrap/extconf.rb delete mode 100644 ext/mwrap/mwrap.c rename ext/mwrap/jhash.h => jhash.h (100%) create mode 100644 lib/Devel/Mwrap.pm delete mode 100644 lib/mwrap_rack.rb delete mode 100644 mwrap.gemspec create mode 100644 script/mwrap-perl create mode 100644 t/mwrap.t create mode 100644 t/source_location.perl delete mode 100644 test/test_mwrap.rb create mode 100644 typemap diff --git a/.document b/.document deleted file mode 100644 index 4ca33e3..0000000 --- a/.document +++ /dev/null @@ -1,2 +0,0 @@ -ext/mwrap/mwrap.c -lib/mwrap_rack.rb diff --git a/.gitignore b/.gitignore index aa3606c..81948b8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ -/tmp *.o *.so -/pkg -/*.gem -/doc +/MYMETA. +/MYMETA.* +/MANIFEST.gen +/Makefile +/Mwrap.bs +/Mwrap.c +/blib +/pm_to_blib diff --git a/.olddoc.yml b/.olddoc.yml deleted file mode 100644 index dac0353..0000000 --- a/.olddoc.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -cgit_url: https://80x24.org/mwrap.git -git_url: https://80x24.org/mwrap.git -rdoc_url: https://80x24.org/mwrap/ -ml_url: https://80x24.org/mwrap-public/ -public_email: mwrap-public@80x24.org -nntp_url: - - nntp://news.public-inbox.org/inbox.comp.lang.ruby.mwrap diff --git a/MANIFEST b/MANIFEST index e6d8964..2fa42b1 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,14 +1,12 @@ -.document .gitignore -.olddoc.yml COPYING MANIFEST +Makefile.PL +Mwrap.xs README -Rakefile -bin/mwrap -ext/mwrap/extconf.rb -ext/mwrap/jhash.h -ext/mwrap/mwrap.c -lib/mwrap_rack.rb -mwrap.gemspec -test/test_mwrap.rb +jhash.h +lib/Devel/Mwrap.pm +script/mwrap-perl +t/mwrap.t +t/source_location.perl +typemap diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..1ae3080 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,72 @@ +use strict; +use ExtUtils::MakeMaker; +use Config; +my $pkg_config = $ENV{PKG_CONFIG} // 'pkg-config'; +my $LIBS = `$pkg_config --libs liburcu-cds liburcu-bp`; +if ($?) { + print STDERR < +before you can build Devel::Mwrap. + +On Debian: + + apt-get install pkg-config liburcu-dev +END + # tell CPAN testing to indicate missing deps + exit 0; +} + +if ($Config{usemymalloc} eq 'y') { + print STDERR < 'Devel::Mwrap', + VERSION_FROM => 'lib/Devel/Mwrap.pm', + PREREQ_PM => {}, + ABSTRACT_FROM => 'lib/Devel/Mwrap.pm', + EXE_FILES => [qw(script/mwrap-perl)], + AUTHOR => 'mwrap hackers ', + LIBS => $LIBS, # e.g. -lurcu-cds + LICENSE => 'gpl_2', # GPL-2.0+, CPAN::Meta::Spec limitation + MIN_PERL_VERSION => '5.14.0', # for caller_cx + BUILD_REQUIRES => {}, + CCFLAGS => $CCFLAGS, # e.g -I/usr/include/$ARCH + INC => $INC, + depend => { + Makefile => 'lib/Devel/Mwrap.pm', + } +); + +WriteMakefile(@writemakefile_args); + +sub MY::postamble { + </dev/null || gnproc 2>/dev/null || echo 2) + 1 )) +-include config.mak + +check-manifest :: MANIFEST + if git ls-files >\$?.gen 2>&1; then diff -u \$? \$?.gen; fi + +check:: all check-manifest + PERL5LIB=blib/lib:blib/arch prove -vw -j\$(N) +EOF +} diff --git a/Mwrap.xs b/Mwrap.xs new file mode 100644 index 0000000..f196b1a --- /dev/null +++ b/Mwrap.xs @@ -0,0 +1,891 @@ +/* + * Copyright (C) 2018-2019 mwrap hackers + * License: GPL-2.0+ + * Disclaimer: I don't really know my way around XS or Perl internals well + */ +#define _LGPL_SOURCE /* allows URCU to inline some stuff */ +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#include "embed.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "jhash.h" + +static size_t total_bytes_inc, total_bytes_dec; + +extern pthread_key_t __attribute__((weak)) PL_thr_key; + +/* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */ +#define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2) + +#ifdef __FreeBSD__ +void *__malloc(size_t); +void __free(void *); +# define real_malloc __malloc +# define real_free __free +#else +static void *(*real_malloc)(size_t); +static void (*real_free)(void *); +static int resolving_malloc; +#endif /* !FreeBSD */ + +/* + * we need to fake an OOM condition while dlsym is running, + * as that calls calloc under glibc, but we don't have the + * symbol for the jemalloc calloc, yet + */ +# define RETURN_IF_NOT_READY() do { \ + if (!real_malloc) { \ + errno = ENOMEM; \ + return NULL; \ + } \ +} while (0) + +static __thread size_t locating; +static size_t page_size; +static struct cds_lfht *totals; +union padded_mutex { + pthread_mutex_t mtx; + char pad[64]; +}; + +/* a round-robin pool of mutexes */ +#define MUTEX_NR (1 << 6) +#define MUTEX_MASK (MUTEX_NR - 1) +static size_t mutex_i; +static union padded_mutex mutexes[MUTEX_NR] = { + [0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER +}; + +static pthread_mutex_t *mutex_assign(void) +{ + return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx; +} + +static struct cds_lfht * +lfht_new(void) +{ + return cds_lfht_new(16384, 1, 0, CDS_LFHT_AUTO_RESIZE, 0); +} + +__attribute__((constructor)) static void resolve_malloc(void) +{ + int err; + ++locating; + +#ifdef __FreeBSD__ + /* + * PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization, + * which happens at pthread_mutex_lock, and that calls calloc + */ + { + size_t i; + + for (i = 0; i < MUTEX_NR; i++) { + err = pthread_mutex_init(&mutexes[i].mtx, 0); + if (err) { + fprintf(stderr, "error: %s\n", strerror(err)); + _exit(1); + } + } + /* initialize mutexes used by urcu-bp */ + rcu_read_lock(); + rcu_read_unlock(); + } +#else /* !FreeBSD (tested on GNU/Linux) */ + if (!real_malloc) { + resolving_malloc = 1; + real_malloc = dlsym(RTLD_NEXT, "malloc"); + } + real_free = dlsym(RTLD_NEXT, "free"); + if (!real_malloc || !real_free) { + fprintf(stderr, "missing malloc/aligned_alloc/free\n" + "\t%p %p\n", real_malloc, real_free); + _exit(1); + } +#endif /* !FreeBSD */ + err = pthread_atfork(call_rcu_before_fork, + call_rcu_after_fork_parent, + call_rcu_after_fork_child); + if (err) + fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err)); + page_size = sysconf(_SC_PAGESIZE); + --locating; +} + +static void +mutex_lock(pthread_mutex_t *m) +{ + int err = pthread_mutex_lock(m); + assert(err == 0); +} + +static void +mutex_unlock(pthread_mutex_t *m) +{ + int err = pthread_mutex_unlock(m); + assert(err == 0); +} + +#ifndef HAVE_MEMPCPY +static void * +my_mempcpy(void *dest, const void *src, size_t n) +{ + return (char *)memcpy(dest, src, n) + n; +} +#define mempcpy(dst,src,n) my_mempcpy(dst,src,n) +#endif + +/* stolen from glibc: */ +#define RETURN_ADDRESS(nr) \ + (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr))) + +#define INT2STR_MAX (sizeof(unsigned) == 4 ? 10 : 19) +static char *int2str(unsigned num, char *dst, size_t * size) +{ + if (num <= 9) { + *size -= 1; + *dst++ = (char)(num + '0'); + return dst; + } else { + char buf[INT2STR_MAX]; + char *end = buf + sizeof(buf); + char *p = end; + size_t adj; + + do { + *size -= 1; + *--p = (char)((num % 10) + '0'); + num /= 10; + } while (num && *size); + + if (!num) { + adj = end - p; + return mempcpy(dst, p, adj); + } + } + return NULL; +} + +/* allocated via real_malloc/real_free */ +struct src_loc { + pthread_mutex_t *mtx; + size_t total; + size_t allocations; + size_t frees; + struct cds_lfht_node hnode; + struct cds_list_head allocs; /* <=> alloc_hdr.node */ + uint32_t hval; + uint32_t capa; + char k[]; +}; + +/* + * I hate typedefs, especially when they're hiding the fact that there's + * a pointer, but XS needs this, apparently, and it does s/__/::/g + */ +typedef struct src_loc * Devel__Mwrap__SrcLoc; + +/* every allocation has this in the header, maintain alignment with malloc */ +struct alloc_hdr { + struct cds_list_head anode; /* <=> src_loc.allocs */ + union { + struct { + struct src_loc *loc; + } live; + struct rcu_head dead; + } as; + void *real; /* what to call real_free on */ + size_t size; +}; + +static __thread char kbuf[ + PATH_MAX + INT2STR_MAX + sizeof(struct alloc_hdr) + 2 +]; + +static struct alloc_hdr *ptr2hdr(void *p) +{ + return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr)); +} + +static void *hdr2ptr(struct alloc_hdr *h) +{ + return (void *)((uintptr_t)h + sizeof(struct alloc_hdr)); +} + +static int loc_is_addr(const struct src_loc *l) +{ + return l->capa == 0; +} + +static size_t loc_size(const struct src_loc *l) +{ + return loc_is_addr(l) ? sizeof(uintptr_t) : l->capa; +} + +static int loc_eq(struct cds_lfht_node *node, const void *key) +{ + const struct src_loc *existing; + const struct src_loc *k = key; + + existing = caa_container_of(node, struct src_loc, hnode); + + return (k->hval == existing->hval && + k->capa == existing->capa && + memcmp(k->k, existing->k, loc_size(k)) == 0); +} + +static struct src_loc *totals_add_rcu(struct src_loc *k) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *cur; + struct src_loc *l = 0; + struct cds_lfht *t; + +again: + t = rcu_dereference(totals); + if (!t) goto out_unlock; + cds_lfht_lookup(t, k->hval, loc_eq, k, &iter); + cur = cds_lfht_iter_get_node(&iter); + if (cur) { + l = caa_container_of(cur, struct src_loc, hnode); + uatomic_add(&l->total, k->total); + uatomic_add(&l->allocations, 1); + } else { + size_t n = loc_size(k); + l = real_malloc(sizeof(*l) + n); + if (!l) goto out_unlock; + memcpy(l, k, sizeof(*l) + n); + l->mtx = mutex_assign(); + l->frees = 0; + l->allocations = 1; + CDS_INIT_LIST_HEAD(&l->allocs); + cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode); + if (cur != &l->hnode) { /* lost race */ + rcu_read_unlock(); + real_free(l); + rcu_read_lock(); + goto again; + } + } +out_unlock: + return l; +} + +static void update_stats_rcu_unlock(const struct src_loc *l) +{ + if (caa_likely(l)) rcu_read_unlock(); +} + +static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller) +{ + const PERL_CONTEXT *cx = NULL; + static const size_t xlen = sizeof(caller); + struct src_loc *k, *ret = 0; + char *dst; + + if (caa_unlikely(!totals)) return 0; + if (locating++) goto out; /* do not recurse into another *alloc */ + + uatomic_add(&total_bytes_inc, size); + + rcu_read_lock(); + cx = caller_cx(0, NULL); + if (cx) { + const char *ptr = OutCopFILE(cx->blk_oldcop); + const COP *lcop; + unsigned line; + size_t len; + size_t int_size = INT2STR_MAX; + + if (!ptr) goto unknown; + + lcop = Perl_closest_cop(aTHX_ cx->blk_oldcop, + OpSIBLING(cx->blk_oldcop), + cx->blk_sub.retop, TRUE); + if (!lcop) + lcop = cx->blk_oldcop; + line = CopLINE(lcop); + + /* avoid vsnprintf or anything which could call malloc here: */ + len = strlen(ptr); + if (len > PATH_MAX) + len = PATH_MAX; + k = (void *)kbuf; + k->total = size; + dst = mempcpy(k->k, ptr, len); + *dst++ = ':'; + + if (line == UINT_MAX) /* no line number */ + *dst++ = '-'; + else + dst = int2str(line, dst, &int_size); + + assert(dst && "bad math"); + *dst = 0; /* terminate string */ + k->capa = (uint32_t)(dst - k->k + 1); + k->hval = jhash(k->k, k->capa, 0xdeadbeef); + ret = totals_add_rcu(k); + } else { +unknown: + k = alloca(sizeof(*k) + xlen); + k->total = size; + memcpy(k->k, &caller, xlen); + k->capa = 0; + k->hval = jhash(k->k, xlen, 0xdeadbeef); + ret = totals_add_rcu(k); + } +out: + --locating; + return ret; +} + +size_t malloc_usable_size(void *p) +{ + return ptr2hdr(p)->size; +} + +static void +free_hdr_rcu(struct rcu_head *dead) +{ + struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead); + real_free(h->real); +} + +void free(void *p) +{ + if (p) { + struct alloc_hdr *h = ptr2hdr(p); + struct src_loc *l = h->as.live.loc; + + if (!real_free) return; /* oh well, leak a little */ + if (l) { + uatomic_add(&total_bytes_dec, h->size); + uatomic_set(&h->size, 0); + uatomic_add(&l->frees, 1); + + mutex_lock(l->mtx); + cds_list_del_rcu(&h->anode); + mutex_unlock(l->mtx); + + call_rcu(&h->as.dead, free_hdr_rcu); + } else { + real_free(h->real); + } + } +} + +static void +alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real) +{ + /* we need src_loc to remain alive for the duration of this call */ + if (!h) return; + h->size = size; + h->real = real; + h->as.live.loc = l; + if (l) { + mutex_lock(l->mtx); + cds_list_add_rcu(&h->anode, &l->allocs); + mutex_unlock(l->mtx); + } +} + +static size_t size_align(size_t size, size_t alignment) +{ + return ((size + (alignment - 1)) & ~(alignment - 1)); +} + +static bool ptr_is_aligned(const void *ptr, size_t alignment) +{ + return ((uintptr_t)ptr & (alignment - 1)) == 0; +} + +static void *ptr_align(void *ptr, size_t alignment) +{ + return (void *)(((uintptr_t)ptr + (alignment - 1)) & ~(alignment - 1)); +} + +static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; } + +static int +internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller) +{ + struct src_loc *l; + struct alloc_hdr *h; + void *real; + size_t asize; + size_t d = alignment / sizeof(void*); + size_t r = alignment % sizeof(void*); + + if (!real_malloc) return ENOMEM; + + if (r != 0 || d == 0 || !is_power_of_two(d)) + return EINVAL; + + if (alignment <= ASSUMED_MALLOC_ALIGNMENT) { + void *p = malloc(size); + if (!p) return ENOMEM; + *pp = p; + return 0; + } + for (; alignment < sizeof(struct alloc_hdr); alignment *= 2) + ; /* double alignment until >= sizeof(struct alloc_hdr) */ + if (__builtin_add_overflow(size, alignment, &asize) || + __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize)) + return ENOMEM; + + l = update_stats_rcu_lock(size, caller); + + real = real_malloc(asize); + if (real) { + void *p = hdr2ptr(real); + if (!ptr_is_aligned(p, alignment)) + p = ptr_align(p, alignment); + h = ptr2hdr(p); + alloc_insert_rcu(l, h, size, real); + update_stats_rcu_unlock(l); + *pp = p; + } + + return real ? 0 : ENOMEM; +} + +static void * +memalign_result(int err, void *p) +{ + if (caa_unlikely(err)) { + errno = err; + return 0; + } + return p; +} + +void *memalign(size_t alignment, size_t size) +{ + void *p; + int err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0)); + return memalign_result(err, p); +} + +int posix_memalign(void **p, size_t alignment, size_t size) +{ + return internal_memalign(p, alignment, size, RETURN_ADDRESS(0)); +} + +void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign"))); +void cfree(void *) __attribute__((alias("free"))); + +void *valloc(size_t size) +{ + void *p; + int err = internal_memalign(&p, page_size, size, RETURN_ADDRESS(0)); + return memalign_result(err, p); +} + +#if __GNUC__ < 7 +# define add_overflow_p(a,b) __extension__({ \ + __typeof__(a) _c; \ + __builtin_add_overflow(a,b,&_c); \ + }) +#else +# define add_overflow_p(a,b) \ + __builtin_add_overflow_p((a),(b),(__typeof__(a+b))0) +#endif + +void *pvalloc(size_t size) +{ + size_t alignment = page_size; + void *p; + int err; + + if (add_overflow_p(size, alignment)) { + errno = ENOMEM; + return 0; + } + size = size_align(size, alignment); + err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0)); + return memalign_result(err, p); +} + +void *malloc(size_t size) +{ + struct src_loc *l; + struct alloc_hdr *h; + size_t asize; + void *p; + + if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) + goto enomem; + + /* + * Needed for C++ global declarations using "new", + * which happens before our constructor + */ +#ifndef __FreeBSD__ + if (!real_malloc) { + if (resolving_malloc) goto enomem; + resolving_malloc = 1; + real_malloc = dlsym(RTLD_NEXT, "malloc"); + } +#endif + l = update_stats_rcu_lock(size, RETURN_ADDRESS(0)); + p = h = real_malloc(asize); + if (h) { + alloc_insert_rcu(l, h, size, h); + p = hdr2ptr(h); + } + update_stats_rcu_unlock(l); + if (caa_unlikely(!p)) errno = ENOMEM; + return p; +enomem: + errno = ENOMEM; + return 0; +} + +void *calloc(size_t nmemb, size_t size) +{ + void *p; + struct src_loc *l; + struct alloc_hdr *h; + size_t asize; + + if (__builtin_mul_overflow(size, nmemb, &size)) { + errno = ENOMEM; + return 0; + } + if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) { + errno = ENOMEM; + return 0; + } + RETURN_IF_NOT_READY(); + l = update_stats_rcu_lock(size, RETURN_ADDRESS(0)); + p = h = real_malloc(asize); + if (p) { + alloc_insert_rcu(l, h, size, h); + p = hdr2ptr(h); + memset(p, 0, size); + } + update_stats_rcu_unlock(l); + if (caa_unlikely(!p)) errno = ENOMEM; + return p; +} + +void *realloc(void *ptr, size_t size) +{ + void *p; + struct src_loc *l; + struct alloc_hdr *h; + size_t asize; + + if (!size) { + free(ptr); + return 0; + } + if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) { + errno = ENOMEM; + return 0; + } + RETURN_IF_NOT_READY(); + + l = update_stats_rcu_lock(size, RETURN_ADDRESS(0)); + p = h = real_malloc(asize); + if (p) { + alloc_insert_rcu(l, h, size, h); + p = hdr2ptr(h); + } + update_stats_rcu_unlock(l); + + if (ptr && p) { + struct alloc_hdr *old = ptr2hdr(ptr); + memcpy(p, ptr, old->size < size ? old->size : size); + free(ptr); + } + if (caa_unlikely(!p)) errno = ENOMEM; + return p; +} + +struct dump_arg { + FILE *fp; + size_t min; +}; + +static void *dump_to_file(struct dump_arg *a) +{ + struct cds_lfht_iter iter; + struct src_loc *l; + struct cds_lfht *t; + + ++locating; + rcu_read_lock(); + t = rcu_dereference(totals); + if (!t) + goto out_unlock; + cds_lfht_for_each_entry(t, &iter, l, hnode) { + const void *p = l->k; + char **s = 0; + if (l->total <= a->min) continue; + + if (loc_is_addr(l)) { + s = backtrace_symbols(p, 1); + p = s[0]; + } + fprintf(a->fp, "%16zu %12zu %s\n", + l->total, l->allocations, (const char *)p); + if (s) free(s); + } +out_unlock: + rcu_read_unlock(); + --locating; + return 0; +} + +static SV *location_string(struct src_loc *l) +{ + SV *ret; + + if (loc_is_addr(l)) { + char **s = backtrace_symbols((void *)l->k, 1); + + ret = newSVpvn(s[0], strlen(s[0])); + } + else { + ret = newSVpvn(l->k, l->capa - 1); + } + + return ret; +} + +static int +extract_addr(const char *str, size_t len, void **p) +{ + const char *c; +#if defined(__GLIBC__) + return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p)); +#else /* TODO: test FreeBSD */ + return ((c = strstr(str, "0x")) && sscanf(c, "%p", p)); +#endif +} + +#ifndef O_CLOEXEC +# define O_CLOEXEC 0 +#endif +__attribute__ ((destructor)) +static void dump_destructor(void) +{ + const char *opt = getenv("MWRAP"); + const char *modes[] = { "a", "a+", "w", "w+", "r+" }; + struct dump_arg a = { .min = 0 }; + size_t i; + int dump_fd; + char *dump_path; + char *s; + + if (!opt) + return; + + ++locating; + if ((dump_path = strstr(opt, "dump_path:")) && + (dump_path += sizeof("dump_path")) && + *dump_path) { + char *end = strchr(dump_path, ','); + if (end) { + char *tmp = alloca(end - dump_path + 1); + end = mempcpy(tmp, dump_path, end - dump_path); + *end = 0; + dump_path = tmp; + } + dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT, + 0666); + if (dump_fd < 0) { + fprintf(stderr, "open %s failed: %s\n", dump_path, + strerror(errno)); + goto out; + } + } + else if (!sscanf(opt, "dump_fd:%d", &dump_fd)) + goto out; + + if ((s = strstr(opt, "dump_min:"))) + sscanf(s, "dump_min:%zu", &a.min); + + switch (dump_fd) { + case 0: goto out; + case 1: a.fp = stdout; break; + case 2: a.fp = stderr; break; + default: + if (dump_fd < 0) + goto out; + a.fp = 0; + + for (i = 0; !a.fp && i < 5; i++) + a.fp = fdopen(dump_fd, modes[i]); + + if (!a.fp) { + fprintf(stderr, "failed to open fd=%d: %s\n", + dump_fd, strerror(errno)); + goto out; + } + /* we'll leak some memory here, but this is a destructor */ + } + dump_to_file(&a); +out: + --locating; +} + +MODULE = Devel::Mwrap PACKAGE = Devel::Mwrap PREFIX = mwrap_ + +BOOT: + totals = lfht_new(); + if (!totals) + fprintf(stderr, "failed to allocate totals table\n"); + +PROTOTYPES: ENABLE + +size_t +mwrap_total_bytes_allocated() +CODE: + RETVAL = total_bytes_inc; +OUTPUT: + RETVAL + +size_t +mwrap_total_bytes_freed() +CODE: + RETVAL = total_bytes_dec; +OUTPUT: + RETVAL + +void +mwrap_reset() +PREINIT: + struct cds_lfht *t; + struct cds_lfht_iter iter; + struct src_loc *l; +CODE: + uatomic_set(&total_bytes_inc, 0); + uatomic_set(&total_bytes_dec, 0); + + rcu_read_lock(); + t = rcu_dereference(totals); + cds_lfht_for_each_entry(t, &iter, l, hnode) { + uatomic_set(&l->total, 0); + uatomic_set(&l->allocations, 0); + uatomic_set(&l->frees, 0); + } + rcu_read_unlock(); + +Devel::Mwrap::SrcLoc +mwrap_get(loc) + SV *loc; +PREINIT: + STRLEN len; + const char *str; + struct src_loc *k = 0; + uintptr_t p; + struct cds_lfht_iter iter; + struct cds_lfht_node *cur; + struct cds_lfht *t; + struct src_loc *l = NULL; + ++locating; +CODE: + if (!SvPOK(loc)) + XSRETURN_UNDEF; + str = SvPV(loc, len); + if (len > PATH_MAX) + XSRETURN_UNDEF; + if (extract_addr(str, len, (void **)&p)) { + k = (void *)kbuf; + memcpy(k->k, &p, sizeof(p)); + k->capa = 0; + k->hval = jhash(k->k, sizeof(p), 0xdeadbeef); + } else { + k = (void *)kbuf; + memcpy(k->k, str, len + 1); + k->capa = len + 1; + k->hval = jhash(k->k, k->capa, 0xdeadbeef); + } + + if (!k) + XSRETURN_UNDEF; + + rcu_read_lock(); + t = rcu_dereference(totals); + if (!t) goto out_unlock; + + cds_lfht_lookup(t, k->hval, loc_eq, k, &iter); + cur = cds_lfht_iter_get_node(&iter); + if (cur) + l = caa_container_of(cur, struct src_loc, hnode); +out_unlock: + rcu_read_unlock(); + RETVAL = l; +OUTPUT: + RETVAL +CLEANUP: + --locating; + +MODULE = Devel::Mwrap PACKAGE = Devel::Mwrap::SrcLoc PREFIX = src_loc_ + +PROTOTYPES: ENABLE + +size_t +src_loc_frees(self) + Devel::Mwrap::SrcLoc self +PREINIT: + ++locating; +CODE: + RETVAL = uatomic_read(&self->frees); +OUTPUT: + RETVAL +CLEANUP: + --locating; + +size_t +src_loc_allocations(self) + Devel::Mwrap::SrcLoc self +PREINIT: + ++locating; +CODE: + RETVAL = uatomic_read(&self->allocations); +OUTPUT: + RETVAL +CLEANUP: + --locating; + +size_t +src_loc_total(self) + Devel::Mwrap::SrcLoc self +PREINIT: + ++locating; +CODE: + RETVAL = uatomic_read(&self->total); +OUTPUT: + RETVAL +CLEANUP: + --locating; + +SV * +src_loc_name(self) + Devel::Mwrap::SrcLoc self +PREINIT: + ++locating; +CODE: + RETVAL = location_string(self); +OUTPUT: + RETVAL +CLEANUP: + --locating; diff --git a/README b/README index 3a20258..97ff4ea 100644 --- a/README +++ b/README @@ -1,95 +1,83 @@ -= mwrap - LD_PRELOAD malloc wrapper + line stats for Ruby +Devel::Mwrap - LD_PRELOAD malloc wrapper + line stats for Perl -mwrap is designed to answer the question: +Devel::Mwrap is designed to answer the question: - Which lines of Ruby are hitting malloc the most? + Which lines of Perl are hitting malloc the most? -mwrap wraps all malloc-family calls to trace the Ruby source -location of such calls and bytes allocated at each callsite. -As of mwrap 2.0.0, it can also function as a leak detector -and show live allocations at every call site. Depending on -your application and workload, the overhead is roughly a 50% -increase memory and runtime. +Devel::Mwrap wraps all malloc-family calls to trace the Perl source +location of such calls and bytes allocated at each callsite. It +can also function as a leak detector and show live allocations +at every call site. Depending on your application and workload, +the overhead is roughly a 50%-100% increase memory and runtime. -It works best for allocations under GVL, but tries to track -numeric caller addresses for allocations made without GVL so you -can get an idea of how much memory usage certain extensions and -native libraries use. +It is thread-safe and requires the concurrent lock-free hash table +from the Userspace RCU project: https://liburcu.org/ -It requires the concurrent lock-free hash table from the -Userspace RCU project: https://liburcu.org/ +It relies on dynamic linking to a malloc(3) implementation. If +you got Perl from your OS distribution, this typically does not +require rebuilding Perl. -It does not require recompiling or rebuilding Ruby, but only -supports Ruby trunk (2.6.0dev+) on a few platforms: +Tested on the perl package distributed with: -* GNU/Linux -* FreeBSD (tested 11.1) +* Debian GNU/Linux 9, 10 -It may work on NetBSD, OpenBSD and DragonFly BSD. +It may work on FreeBSD, NetBSD, OpenBSD and DragonFly BSD. == Install - # FreeBSD: pkg install liburcu + # FreeBSD: pkg install pkg-config liburcu - # Debian-based systems: apt-get liburcu-dev - - # Install mwrap via RubyGems.org - gem install mwrap + # Debian-based systems: apt-get install pkg-config liburcu-dev == Usage -mwrap works as an LD_PRELOAD and supplies a mwrap RubyGem executable to +Devel::Mwrap works as an LD_PRELOAD and supplies a mwrap-perl script to improve ease-of-use. You can set dump_path: in the MWRAP environment variable to append the results to a log file: - MWRAP=dump_path:/path/to/log mwrap RUBY_COMMAND + MWRAP=dump_path:/path/to/log mwrap-perl PERL_COMMAND # And to display the locations with the most allocations: sort -k1,1rn dump, Devel::Mwrap->reset, Devel::Mwrap->each, etc. -However, mwrap MUST be loaded via LD_PRELOAD to have any +However, Devel::Mwrap MUST be loaded via LD_PRELOAD to have any effect in tracking malloc use. However, it is safe to keep -"require 'mwrap'" in performance-critical deployments, +"use Devel::Mwrap" in performance-critical deployments, as overhead is only incurred when used as an LD_PRELOAD. -The output of the mwrap dump is a text file with 3 columns: +The output of the Devel::Mwrap->dump is a text file with 3 columns: total_bytes call_count location -Where location is a Ruby source location (if made under GVL) -or an address retrieved by backtrace_symbols(3). It is -recommended to use the sort(1) command on either of the -first two columns to find the hottest malloc locations. - -mwrap 2.0.0+ also supports a Rack application endpoint, -it is documented at: - - https://80x24.org/mwrap/MwrapRack.html +Where location is a Perl source location or an address retrieved +by backtrace_symbols(3). It is recommended to use the sort(1) +command on either of the first two columns to find the hottest +malloc locations. == Known problems * 32-bit machines are prone to overflow (WONTFIX) -== Mail archives and list: +== Mail archives and newsgroup: - https://80x24.org/mwrap-public/ - nntp://80x24.org/inbox.comp.lang.ruby.mwrap + https://80x24.org/mwrap-perl/ + nntp://80x24.org/inbox.comp.lang.perl.mwrap No subscription will ever be required to post, but HTML mail will be rejected: - mwrap-public@80x24.org + mwrap-perl@80x24.org == Hacking - git clone https://80x24.org/mwrap.git + git clone https://80x24.org/mwrap-perl.git -Send all patches and pull requests (use "git request-pull" to format) to -the mailing list. We do not use centralized or proprietary messaging -systems. +Send all patches and pull requests (use "git request-pull" to format) +via email to mwrap-perl@80x24.org. We do not and will not use +proprietary messaging systems. == License diff --git a/Rakefile b/Rakefile deleted file mode 100644 index 50bfa89..0000000 --- a/Rakefile +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2018 mwrap hackers -# License: GPL-2.0+ -require 'rake/testtask' -begin - require 'rake/extensiontask' - Rake::ExtensionTask.new('mwrap') -rescue LoadError - warn 'rake-compiler not available, cross compiling disabled' -end - -Rake::TestTask.new(:test) -task :test => :compile -task :default => :compile - -c_files = File.readlines('MANIFEST').grep(%r{ext/.*\.[ch]$}).map!(&:chomp!) -task 'compile:mwrap' => c_files diff --git a/bin/mwrap b/bin/mwrap deleted file mode 100755 index 9f67dab..0000000 --- a/bin/mwrap +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/ruby -# frozen_string_literal: true -# Copyright (C) 2018 mwrap hackers -# License: GPL-2.0+ -require 'mwrap' -mwrap_so = $".grep(%r{/mwrap\.so\z})[0] or abort "mwrap.so not loaded" -cur = ENV['LD_PRELOAD'] -if cur - cur = cur.split(/[:\s]+/) - if !cur.include?(mwrap_so) - # drop old versions - cur.delete_if { |path| path.end_with?('/mwrap.so') } - cur.unshift(mwrap_so) - ENV['LD_PRELOAD'] = cur.join(':') - end -else - ENV['LD_PRELOAD'] = mwrap_so -end - -# work around close-on-exec by default behavior in Ruby: -opts = {} -if ENV['MWRAP'] =~ /dump_fd:(\d+)/ - dump_fd = $1.to_i - if dump_fd > 2 - dump_io = IO.new(dump_fd) - opts[dump_fd] = dump_io - end -end - -# allow inheriting FDs from systemd -n = ENV['LISTEN_FDS'] -if n && ENV['LISTEN_PID'].to_i == $$ - n = 3 + n.to_i - (3...n).each { |fd| opts[fd] = IO.new(fd) } -end -exec *ARGV, opts diff --git a/ext/mwrap/extconf.rb b/ext/mwrap/extconf.rb deleted file mode 100644 index e9dbb1e..0000000 --- a/ext/mwrap/extconf.rb +++ /dev/null @@ -1,28 +0,0 @@ -# frozen_string_literal: true -# Copyright (C) 2018 mwrap hackers -# License: GPL-2.0+ -require 'mkmf' - -have_func 'mempcpy' -have_library 'urcu-cds' or abort 'userspace RCU not installed' -have_header 'urcu/rculfhash.h' or abort 'rculfhash.h not found' -have_library 'urcu-bp' or abort 'liburcu-bp not found' -have_library 'dl' -have_library 'c' -have_library 'execinfo' # FreeBSD - -if try_link(<<'') -int main(void) { return __builtin_add_overflow_p(0,0,(int)1); } - - $defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P' -end - -if try_link(<<'') -int main(int a) { return __builtin_add_overflow(0,0,&a); } - - $defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P' -else - abort 'missing __builtin_add_overflow' -end - -create_makefile 'mwrap' diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c deleted file mode 100644 index 5174127..0000000 --- a/ext/mwrap/mwrap.c +++ /dev/null @@ -1,1464 +0,0 @@ -/* - * Copyright (C) 2018 mwrap hackers - * License: GPL-2.0+ - */ -#define _LGPL_SOURCE /* allows URCU to inline some stuff */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "jhash.h" - -static ID id_uminus; -const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */ -extern int __attribute__((weak)) ruby_thread_has_gvl_p(void); -extern void * __attribute__((weak)) ruby_current_execution_context_ptr; -extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */ -extern size_t __attribute__((weak)) rb_gc_count(void); -extern VALUE __attribute__((weak)) rb_cObject; -extern VALUE __attribute__((weak)) rb_eTypeError; -extern VALUE __attribute__((weak)) rb_yield(VALUE); - -static size_t total_bytes_inc, total_bytes_dec; - -/* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */ -#define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2) - -/* match values in Ruby gc.c */ -#define HEAP_PAGE_ALIGN_LOG 14 -enum { - HEAP_PAGE_ALIGN = (1UL << HEAP_PAGE_ALIGN_LOG), - REQUIRED_SIZE_BY_MALLOC = (sizeof(size_t) * 5), - HEAP_PAGE_SIZE = (HEAP_PAGE_ALIGN - REQUIRED_SIZE_BY_MALLOC) -}; - -#define IS_HEAP_PAGE_BODY ((struct src_loc *)-1) - -int __attribute__((weak)) ruby_thread_has_gvl_p(void) -{ - return 0; -} - -#ifdef __FreeBSD__ -void *__malloc(size_t); -void __free(void *); -# define real_malloc __malloc -# define real_free __free -#else -static void *(*real_malloc)(size_t); -static void (*real_free)(void *); -static int resolving_malloc; -#endif /* !FreeBSD */ - -/* - * we need to fake an OOM condition while dlsym is running, - * as that calls calloc under glibc, but we don't have the - * symbol for the jemalloc calloc, yet - */ -# define RETURN_IF_NOT_READY() do { \ - if (!real_malloc) { \ - errno = ENOMEM; \ - return NULL; \ - } \ -} while (0) - -static __thread size_t locating; -static size_t generation; -static size_t page_size; -static struct cds_lfht *totals; -union padded_mutex { - pthread_mutex_t mtx; - char pad[64]; -}; - -/* a round-robin pool of mutexes */ -#define MUTEX_NR (1 << 6) -#define MUTEX_MASK (MUTEX_NR - 1) -static size_t mutex_i; -static union padded_mutex mutexes[MUTEX_NR] = { - [0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER -}; - -static pthread_mutex_t *mutex_assign(void) -{ - return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx; -} - -static struct cds_lfht * -lfht_new(void) -{ - return cds_lfht_new(16384, 1, 0, CDS_LFHT_AUTO_RESIZE, 0); -} - -__attribute__((constructor)) static void resolve_malloc(void) -{ - int err; - ++locating; - -#ifdef __FreeBSD__ - /* - * PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization, - * which happens at pthread_mutex_lock, and that calls calloc - */ - { - size_t i; - - for (i = 0; i < MUTEX_NR; i++) { - err = pthread_mutex_init(&mutexes[i].mtx, 0); - if (err) { - fprintf(stderr, "error: %s\n", strerror(err)); - _exit(1); - } - } - /* initialize mutexes used by urcu-bp */ - rcu_read_lock(); - rcu_read_unlock(); - } -#else /* !FreeBSD (tested on GNU/Linux) */ - if (!real_malloc) { - resolving_malloc = 1; - real_malloc = dlsym(RTLD_NEXT, "malloc"); - } - real_free = dlsym(RTLD_NEXT, "free"); - if (!real_malloc || !real_free) { - fprintf(stderr, "missing malloc/aligned_alloc/free\n" - "\t%p %p\n", real_malloc, real_free); - _exit(1); - } -#endif /* !FreeBSD */ - totals = lfht_new(); - if (!totals) - fprintf(stderr, "failed to allocate totals table\n"); - - err = pthread_atfork(call_rcu_before_fork, - call_rcu_after_fork_parent, - call_rcu_after_fork_child); - if (err) - fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err)); - page_size = sysconf(_SC_PAGESIZE); - --locating; -} - -static void -mutex_lock(pthread_mutex_t *m) -{ - int err = pthread_mutex_lock(m); - assert(err == 0); -} - -static void -mutex_unlock(pthread_mutex_t *m) -{ - int err = pthread_mutex_unlock(m); - assert(err == 0); -} - -#ifndef HAVE_MEMPCPY -static void * -my_mempcpy(void *dest, const void *src, size_t n) -{ - return (char *)memcpy(dest, src, n) + n; -} -#define mempcpy(dst,src,n) my_mempcpy(dst,src,n) -#endif - -/* stolen from glibc: */ -#define RETURN_ADDRESS(nr) \ - (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr))) - -#define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19) -static char *int2str(int num, char *dst, size_t * size) -{ - if (num <= 9) { - *size -= 1; - *dst++ = (char)(num + '0'); - return dst; - } else { - char buf[INT2STR_MAX]; - char *end = buf + sizeof(buf); - char *p = end; - size_t adj; - - do { - *size -= 1; - *--p = (char)((num % 10) + '0'); - num /= 10; - } while (num && *size); - - if (!num) { - adj = end - p; - return mempcpy(dst, p, adj); - } - } - return NULL; -} - -/* - * rb_source_location_cstr relies on GET_EC(), and it's possible - * to have a native thread but no EC during the early and late - * (teardown) phases of the Ruby process - */ -static int has_ec_p(void) -{ - return (ruby_thread_has_gvl_p() && ruby_current_vm_ptr && - ruby_current_execution_context_ptr); -} - -struct acc { - uint64_t nr; - int64_t min; - int64_t max; - double m2; - double mean; -}; - -#define ACC_INIT(name) { .nr=0, .min=INT64_MAX, .max=-1, .m2=0, .mean=0 } - -/* for tracking 16K-aligned heap page bodies (protected by GVL) */ -struct { - pthread_mutex_t lock; - struct cds_list_head bodies; - struct cds_list_head freed; - - struct acc alive; - struct acc reborn; -} hpb_stats = { - .lock = PTHREAD_MUTEX_INITIALIZER, - .bodies = CDS_LIST_HEAD_INIT(hpb_stats.bodies), - .freed = CDS_LIST_HEAD_INIT(hpb_stats.freed), - .alive = ACC_INIT(hpb_stats.alive), - .reborn = ACC_INIT(hpb_stats.reborn) -}; - -/* allocated via real_malloc/real_free */ -struct src_loc { - pthread_mutex_t *mtx; - size_t total; - size_t allocations; - size_t frees; - size_t age_total; /* (age_total / frees) => mean age at free */ - size_t max_lifespan; - struct cds_lfht_node hnode; - struct cds_list_head allocs; /* <=> alloc_hdr.node */ - uint32_t hval; - uint32_t capa; - char k[]; -}; - -/* every allocation has this in the header, maintain alignment with malloc */ -struct alloc_hdr { - struct cds_list_head anode; /* <=> src_loc.allocs */ - union { - struct { - size_t gen; /* rb_gc_count() */ - struct src_loc *loc; - } live; - struct rcu_head dead; - struct { - size_t at; /* rb_gc_count() */ - } hpb_freed; - } as; - void *real; /* what to call real_free on */ - size_t size; -}; - -static char kbuf[PATH_MAX + INT2STR_MAX + sizeof(struct alloc_hdr) + 2]; - -static struct alloc_hdr *ptr2hdr(void *p) -{ - return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr)); -} - -static void *hdr2ptr(struct alloc_hdr *h) -{ - return (void *)((uintptr_t)h + sizeof(struct alloc_hdr)); -} - -static int loc_is_addr(const struct src_loc *l) -{ - return l->capa == 0; -} - -static size_t loc_size(const struct src_loc *l) -{ - return loc_is_addr(l) ? sizeof(uintptr_t) : l->capa; -} - -static int loc_eq(struct cds_lfht_node *node, const void *key) -{ - const struct src_loc *existing; - const struct src_loc *k = key; - - existing = caa_container_of(node, struct src_loc, hnode); - - return (k->hval == existing->hval && - k->capa == existing->capa && - memcmp(k->k, existing->k, loc_size(k)) == 0); -} - -/* note: not atomic */ -static void -acc_add(struct acc *acc, size_t val) -{ - double delta = val - acc->mean; - uint64_t nr = ++acc->nr; - - /* just don't divide-by-zero if we ever hit this (unlikely :P) */ - if (nr) - acc->mean += delta / nr; - - acc->m2 += delta * (val - acc->mean); - if ((int64_t)val < acc->min) - acc->min = (int64_t)val; - if ((int64_t)val > acc->max) - acc->max = (int64_t)val; -} - -#if SIZEOF_LONG == 8 -# define INT64toNUM(x) LONG2NUM((long)x) -#elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8 -# define INT64toNUM(x) LL2NUM((LONG_LONG)x) -#endif - -static VALUE -acc_max(const struct acc *acc) -{ - return INT64toNUM(acc->max); -} - -static VALUE -acc_min(const struct acc *acc) -{ - return acc->min == INT64_MAX ? INT2FIX(-1) : INT64toNUM(acc->min); -} - -static VALUE -acc_mean(const struct acc *acc) -{ - return DBL2NUM(acc->nr ? acc->mean : HUGE_VAL); -} - -static double -acc_stddev_dbl(const struct acc *acc) -{ - if (acc->nr > 1) { - double variance = acc->m2 / (acc->nr - 1); - return sqrt(variance); - } - return 0.0; -} - -static VALUE -acc_stddev(const struct acc *acc) -{ - return DBL2NUM(acc_stddev_dbl(acc)); -} - -static struct src_loc *totals_add_rcu(struct src_loc *k) -{ - struct cds_lfht_iter iter; - struct cds_lfht_node *cur; - struct src_loc *l = 0; - struct cds_lfht *t; - -again: - t = rcu_dereference(totals); - if (!t) goto out_unlock; - cds_lfht_lookup(t, k->hval, loc_eq, k, &iter); - cur = cds_lfht_iter_get_node(&iter); - if (cur) { - l = caa_container_of(cur, struct src_loc, hnode); - uatomic_add(&l->total, k->total); - uatomic_add(&l->allocations, 1); - } else { - size_t n = loc_size(k); - l = real_malloc(sizeof(*l) + n); - if (!l) goto out_unlock; - memcpy(l, k, sizeof(*l) + n); - l->mtx = mutex_assign(); - l->age_total = 0; - l->max_lifespan = 0; - l->frees = 0; - l->allocations = 1; - CDS_INIT_LIST_HEAD(&l->allocs); - cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode); - if (cur != &l->hnode) { /* lost race */ - rcu_read_unlock(); - real_free(l); - rcu_read_lock(); - goto again; - } - } -out_unlock: - return l; -} - -static void update_stats_rcu_unlock(const struct src_loc *l) -{ - if (caa_likely(l)) rcu_read_unlock(); -} - -static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller) -{ - struct src_loc *k, *ret = 0; - static const size_t xlen = sizeof(caller); - char *dst; - - if (caa_unlikely(!totals)) return 0; - if (locating++) goto out; /* do not recurse into another *alloc */ - - uatomic_add(&total_bytes_inc, size); - - rcu_read_lock(); - if (has_ec_p()) { - int line; - const char *ptr = rb_source_location_cstr(&line); - size_t len; - size_t int_size = INT2STR_MAX; - - generation = rb_gc_count(); - - if (!ptr) goto unknown; - - /* avoid vsnprintf or anything which could call malloc here: */ - len = strlen(ptr); - k = (void *)kbuf; - k->total = size; - dst = mempcpy(k->k, ptr, len); - *dst++ = ':'; - dst = int2str(line, dst, &int_size); - if (dst) { - *dst = 0; /* terminate string */ - k->capa = (uint32_t)(dst - k->k + 1); - k->hval = jhash(k->k, k->capa, 0xdeadbeef); - ret = totals_add_rcu(k); - } else { - rb_bug("bad math making key from location %s:%d\n", - ptr, line); - } - } else { -unknown: - k = alloca(sizeof(*k) + xlen); - k->total = size; - memcpy(k->k, &caller, xlen); - k->capa = 0; - k->hval = jhash(k->k, xlen, 0xdeadbeef); - ret = totals_add_rcu(k); - } -out: - --locating; - return ret; -} - -size_t malloc_usable_size(void *p) -{ - return ptr2hdr(p)->size; -} - -static void -free_hdr_rcu(struct rcu_head *dead) -{ - struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead); - real_free(h->real); -} - -void free(void *p) -{ - if (p) { - struct alloc_hdr *h = ptr2hdr(p); - struct src_loc *l = h->as.live.loc; - - if (!real_free) return; /* oh well, leak a little */ - if (l && l != IS_HEAP_PAGE_BODY) { - size_t age = generation - h->as.live.gen; - - uatomic_add(&total_bytes_dec, h->size); - uatomic_set(&h->size, 0); - uatomic_add(&l->frees, 1); - uatomic_add(&l->age_total, age); - - mutex_lock(l->mtx); - cds_list_del_rcu(&h->anode); - if (age > l->max_lifespan) - l->max_lifespan = age; - mutex_unlock(l->mtx); - - call_rcu(&h->as.dead, free_hdr_rcu); - } else if (l == IS_HEAP_PAGE_BODY) { - size_t gen = generation; - size_t age = gen - h->as.live.gen; - - h->as.hpb_freed.at = gen; - - mutex_lock(&hpb_stats.lock); - acc_add(&hpb_stats.alive, age); - - /* hpb_stats.bodies => hpb_stats.freed */ - cds_list_move(&h->anode, &hpb_stats.freed); - - mutex_unlock(&hpb_stats.lock); - } else { - real_free(h->real); - } - } -} - -static void -alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real) -{ - /* we need src_loc to remain alive for the duration of this call */ - if (!h) return; - h->size = size; - h->real = real; - h->as.live.loc = l; - h->as.live.gen = generation; - if (l) { - mutex_lock(l->mtx); - cds_list_add_rcu(&h->anode, &l->allocs); - mutex_unlock(l->mtx); - } -} - -static size_t size_align(size_t size, size_t alignment) -{ - return ((size + (alignment - 1)) & ~(alignment - 1)); -} - -static bool ptr_is_aligned(const void *ptr, size_t alignment) -{ - return ((uintptr_t)ptr & (alignment - 1)) == 0; -} - -static void *ptr_align(void *ptr, size_t alignment) -{ - return (void *)(((uintptr_t)ptr + (alignment - 1)) & ~(alignment - 1)); -} - -static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; } - -static int -internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller) -{ - struct src_loc *l; - struct alloc_hdr *h; - void *real; - size_t asize; - size_t d = alignment / sizeof(void*); - size_t r = alignment % sizeof(void*); - - if (!real_malloc) return ENOMEM; - - if (r != 0 || d == 0 || !is_power_of_two(d)) - return EINVAL; - - if (alignment <= ASSUMED_MALLOC_ALIGNMENT) { - void *p = malloc(size); - if (!p) return ENOMEM; - *pp = p; - return 0; - } - for (; alignment < sizeof(struct alloc_hdr); alignment *= 2) - ; /* double alignment until >= sizeof(struct alloc_hdr) */ - if (__builtin_add_overflow(size, alignment, &asize) || - __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize)) - return ENOMEM; - - - if (alignment == HEAP_PAGE_ALIGN && size == HEAP_PAGE_SIZE) { - if (has_ec_p()) generation = rb_gc_count(); - l = IS_HEAP_PAGE_BODY; - } else { - l = update_stats_rcu_lock(size, caller); - } - - if (l == IS_HEAP_PAGE_BODY) { - void *p; - size_t gen = generation; - - mutex_lock(&hpb_stats.lock); - - /* reuse existing entry */ - if (!cds_list_empty(&hpb_stats.freed)) { - size_t deathspan; - - h = cds_list_first_entry(&hpb_stats.freed, - struct alloc_hdr, anode); - /* hpb_stats.freed => hpb_stats.bodies */ - cds_list_move(&h->anode, &hpb_stats.bodies); - assert(h->size == size); - assert(h->real); - real = h->real; - p = hdr2ptr(h); - assert(ptr_is_aligned(p, alignment)); - - deathspan = gen - h->as.hpb_freed.at; - acc_add(&hpb_stats.reborn, deathspan); - } - else { - real = real_malloc(asize); - if (!real) return ENOMEM; - - p = hdr2ptr(real); - if (!ptr_is_aligned(p, alignment)) - p = ptr_align(p, alignment); - h = ptr2hdr(p); - h->size = size; - h->real = real; - cds_list_add(&h->anode, &hpb_stats.bodies); - } - mutex_unlock(&hpb_stats.lock); - h->as.live.loc = l; - h->as.live.gen = gen; - *pp = p; - } - else { - real = real_malloc(asize); - if (real) { - void *p = hdr2ptr(real); - if (!ptr_is_aligned(p, alignment)) - p = ptr_align(p, alignment); - h = ptr2hdr(p); - alloc_insert_rcu(l, h, size, real); - update_stats_rcu_unlock(l); - *pp = p; - } - } - - return real ? 0 : ENOMEM; -} - -static void * -memalign_result(int err, void *p) -{ - if (caa_unlikely(err)) { - errno = err; - return 0; - } - return p; -} - -void *memalign(size_t alignment, size_t size) -{ - void *p; - int err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0)); - return memalign_result(err, p); -} - -int posix_memalign(void **p, size_t alignment, size_t size) -{ - return internal_memalign(p, alignment, size, RETURN_ADDRESS(0)); -} - -void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign"))); -void cfree(void *) __attribute__((alias("free"))); - -void *valloc(size_t size) -{ - void *p; - int err = internal_memalign(&p, page_size, size, RETURN_ADDRESS(0)); - return memalign_result(err, p); -} - -#if __GNUC__ < 7 -# define add_overflow_p(a,b) __extension__({ \ - __typeof__(a) _c; \ - __builtin_add_overflow(a,b,&_c); \ - }) -#else -# define add_overflow_p(a,b) \ - __builtin_add_overflow_p((a),(b),(__typeof__(a+b))0) -#endif - -void *pvalloc(size_t size) -{ - size_t alignment = page_size; - void *p; - int err; - - if (add_overflow_p(size, alignment)) { - errno = ENOMEM; - return 0; - } - size = size_align(size, alignment); - err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0)); - return memalign_result(err, p); -} - -void *malloc(size_t size) -{ - struct src_loc *l; - struct alloc_hdr *h; - size_t asize; - void *p; - - if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) - goto enomem; - - /* - * Needed for C++ global declarations using "new", - * which happens before our constructor - */ -#ifndef __FreeBSD__ - if (!real_malloc) { - if (resolving_malloc) goto enomem; - resolving_malloc = 1; - real_malloc = dlsym(RTLD_NEXT, "malloc"); - } -#endif - l = update_stats_rcu_lock(size, RETURN_ADDRESS(0)); - p = h = real_malloc(asize); - if (h) { - alloc_insert_rcu(l, h, size, h); - p = hdr2ptr(h); - } - update_stats_rcu_unlock(l); - if (caa_unlikely(!p)) errno = ENOMEM; - return p; -enomem: - errno = ENOMEM; - return 0; -} - -void *calloc(size_t nmemb, size_t size) -{ - void *p; - struct src_loc *l; - struct alloc_hdr *h; - size_t asize; - - if (__builtin_mul_overflow(size, nmemb, &size)) { - errno = ENOMEM; - return 0; - } - if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) { - errno = ENOMEM; - return 0; - } - RETURN_IF_NOT_READY(); - l = update_stats_rcu_lock(size, RETURN_ADDRESS(0)); - p = h = real_malloc(asize); - if (p) { - alloc_insert_rcu(l, h, size, h); - p = hdr2ptr(h); - memset(p, 0, size); - } - update_stats_rcu_unlock(l); - if (caa_unlikely(!p)) errno = ENOMEM; - return p; -} - -void *realloc(void *ptr, size_t size) -{ - void *p; - struct src_loc *l; - struct alloc_hdr *h; - size_t asize; - - if (!size) { - free(ptr); - return 0; - } - if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) { - errno = ENOMEM; - return 0; - } - RETURN_IF_NOT_READY(); - - l = update_stats_rcu_lock(size, RETURN_ADDRESS(0)); - p = h = real_malloc(asize); - if (p) { - alloc_insert_rcu(l, h, size, h); - p = hdr2ptr(h); - } - update_stats_rcu_unlock(l); - - if (ptr && p) { - struct alloc_hdr *old = ptr2hdr(ptr); - memcpy(p, ptr, old->size < size ? old->size : size); - free(ptr); - } - if (caa_unlikely(!p)) errno = ENOMEM; - return p; -} - -struct dump_arg { - FILE *fp; - size_t min; -}; - -static void *dump_to_file(void *x) -{ - struct dump_arg *a = x; - struct cds_lfht_iter iter; - struct src_loc *l; - struct cds_lfht *t; - - ++locating; - rcu_read_lock(); - t = rcu_dereference(totals); - if (!t) - goto out_unlock; - cds_lfht_for_each_entry(t, &iter, l, hnode) { - const void *p = l->k; - char **s = 0; - if (l->total <= a->min) continue; - - if (loc_is_addr(l)) { - s = backtrace_symbols(p, 1); - p = s[0]; - } - fprintf(a->fp, "%16zu %12zu %s\n", - l->total, l->allocations, (const char *)p); - if (s) free(s); - } -out_unlock: - rcu_read_unlock(); - --locating; - return 0; -} - -/* - * call-seq: - * - * Mwrap.dump([[io] [, min]] -> nil - * - * Dumps the current totals to +io+ which must be an IO object - * (StringIO and similar are not supported). Total sizes smaller - * than or equal to +min+ are skipped. - * - * The output is space-delimited by 3 columns: - * - * total_size call_count location - */ -static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod) -{ - VALUE io, min; - struct dump_arg a; - rb_io_t *fptr; - - rb_scan_args(argc, argv, "02", &io, &min); - - if (NIL_P(io)) - /* library may be linked w/o Ruby */ - io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr")); - - a.min = NIL_P(min) ? 0 : NUM2SIZET(min); - io = rb_io_get_io(io); - io = rb_io_get_write_io(io); - GetOpenFile(io, fptr); - a.fp = rb_io_stdio_file(fptr); - - rb_thread_call_without_gvl(dump_to_file, &a, 0, 0); - RB_GC_GUARD(io); - return Qnil; -} - -/* The whole operation is not remotely atomic... */ -static void *totals_reset(void *ign) -{ - struct cds_lfht *t; - struct cds_lfht_iter iter; - struct src_loc *l; - - uatomic_set(&total_bytes_inc, 0); - uatomic_set(&total_bytes_dec, 0); - - rcu_read_lock(); - t = rcu_dereference(totals); - cds_lfht_for_each_entry(t, &iter, l, hnode) { - uatomic_set(&l->total, 0); - uatomic_set(&l->allocations, 0); - uatomic_set(&l->frees, 0); - uatomic_set(&l->age_total, 0); - uatomic_set(&l->max_lifespan, 0); - } - rcu_read_unlock(); - return 0; -} - -/* - * call-seq: - * - * Mwrap.reset -> nil - * - * Resets the the total tables by zero-ing all counters. - * This resets all statistics. This is not an atomic operation - * as other threads (outside of GVL) may increment counters. - */ -static VALUE mwrap_reset(VALUE mod) -{ - rb_thread_call_without_gvl(totals_reset, 0, 0, 0); - return Qnil; -} - -/* :nodoc: */ -static VALUE mwrap_clear(VALUE mod) -{ - return mwrap_reset(mod); -} - -static VALUE rcu_unlock_ensure(VALUE ignored) -{ - rcu_read_unlock(); - --locating; - return Qfalse; -} - -static VALUE location_string(struct src_loc *l) -{ - VALUE ret, tmp; - - if (loc_is_addr(l)) { - char **s = backtrace_symbols((void *)l->k, 1); - tmp = rb_str_new_cstr(s[0]); - free(s); - } - else { - tmp = rb_str_new(l->k, l->capa - 1); - } - - /* deduplicate and try to free up some memory */ - ret = rb_funcall(tmp, id_uminus, 0); - if (!OBJ_FROZEN_RAW(tmp)) - rb_str_resize(tmp, 0); - - return ret; -} - -static VALUE dump_each_rcu(VALUE x) -{ - struct dump_arg *a = (struct dump_arg *)x; - struct cds_lfht *t; - struct cds_lfht_iter iter; - struct src_loc *l; - - t = rcu_dereference(totals); - cds_lfht_for_each_entry(t, &iter, l, hnode) { - VALUE v[6]; - if (l->total <= a->min) continue; - - v[0] = location_string(l); - v[1] = SIZET2NUM(l->total); - v[2] = SIZET2NUM(l->allocations); - v[3] = SIZET2NUM(l->frees); - v[4] = SIZET2NUM(l->age_total); - v[5] = SIZET2NUM(l->max_lifespan); - - rb_yield_values2(6, v); - assert(rcu_read_ongoing()); - } - return Qnil; -} - -/* - * call-seq: - * - * Mwrap.each([min]) do |location,total,allocations,frees,age_total,max_lifespan| - * ... - * end - * - * Yields each entry of the of the table to a caller-supplied block. - * +min+ may be specified to filter out lines with +total+ bytes - * equal-to-or-smaller-than the supplied minimum. - */ -static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod) -{ - VALUE min; - struct dump_arg a; - - rb_scan_args(argc, argv, "01", &min); - a.min = NIL_P(min) ? 0 : NUM2SIZET(min); - - ++locating; - rcu_read_lock(); - - return rb_ensure(dump_each_rcu, (VALUE)&a, rcu_unlock_ensure, 0); -} - -static size_t -src_loc_memsize(const void *p) -{ - return sizeof(struct src_loc); -} - -static const rb_data_type_t src_loc_type = { - "source_location", - /* no marking, no freeing */ - { 0, 0, src_loc_memsize, /* reserved */ }, - /* parent, data, [ flags ] */ -}; - -static VALUE cSrcLoc; - -static int -extract_addr(const char *str, size_t len, void **p) -{ - const char *c; -#if defined(__GLIBC__) - return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p)); -#else /* tested FreeBSD */ - return ((c = strstr(str, "0x")) && sscanf(c, "%p", p)); -#endif -} - -/* - * call-seq: - * Mwrap[location] -> Mwrap::SourceLocation - * - * Returns the associated Mwrap::SourceLocation given the +location+ - * String. +location+ is either a Ruby source location path:line - * (e.g. "/path/to/foo.rb:5") or a hexadecimal memory address with - * square-braces part yielded by Mwrap.dump (e.g. "[0xdeadbeef]") - */ -static VALUE mwrap_aref(VALUE mod, VALUE loc) -{ - const char *str = StringValueCStr(loc); - int len = RSTRING_LENINT(loc); - struct src_loc *k = 0; - uintptr_t p; - struct cds_lfht_iter iter; - struct cds_lfht_node *cur; - struct cds_lfht *t; - struct src_loc *l; - VALUE val = Qnil; - - if (extract_addr(str, len, (void **)&p)) { - k = (void *)kbuf; - memcpy(k->k, &p, sizeof(p)); - k->capa = 0; - k->hval = jhash(k->k, sizeof(p), 0xdeadbeef); - } else { - k = (void *)kbuf; - memcpy(k->k, str, len + 1); - k->capa = len + 1; - k->hval = jhash(k->k, k->capa, 0xdeadbeef); - } - - if (!k) return val; - - rcu_read_lock(); - t = rcu_dereference(totals); - if (!t) goto out_unlock; - - cds_lfht_lookup(t, k->hval, loc_eq, k, &iter); - cur = cds_lfht_iter_get_node(&iter); - if (cur) { - l = caa_container_of(cur, struct src_loc, hnode); - val = TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l); - } -out_unlock: - rcu_read_unlock(); - return val; -} - -static VALUE src_loc_each_i(VALUE p) -{ - struct alloc_hdr *h; - struct src_loc *l = (struct src_loc *)p; - - cds_list_for_each_entry_rcu(h, &l->allocs, anode) { - size_t gen = uatomic_read(&h->as.live.gen); - size_t size = uatomic_read(&h->size); - - if (size) { - VALUE v[2]; - v[0] = SIZET2NUM(size); - v[1] = SIZET2NUM(gen); - - rb_yield_values2(2, v); - } - } - - return Qfalse; -} - -static struct src_loc *src_loc_get(VALUE self) -{ - struct src_loc *l; - TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l); - assert(l); - return l; -} - -/* - * call-seq: - * loc = Mwrap[location] - * loc.each { |size,generation| ... } - * - * Iterates through live allocations for a given Mwrap::SourceLocation, - * yielding the +size+ (in bytes) and +generation+ of each allocation. - * The +generation+ is the value of the GC.count method at the time - * the allocation was made. - * - * This functionality is only available in mwrap 2.0.0+ - */ -static VALUE src_loc_each(VALUE self) -{ - struct src_loc *l = src_loc_get(self); - - assert(locating == 0 && "forgot to clear locating"); - ++locating; - rcu_read_lock(); - rb_ensure(src_loc_each_i, (VALUE)l, rcu_unlock_ensure, 0); - return self; -} - -/* - * The the mean lifespan (in GC generations) of allocations made from this - * location. This does not account for live allocations. - */ -static VALUE src_loc_mean_lifespan(VALUE self) -{ - struct src_loc *l = src_loc_get(self); - size_t tot, frees; - - frees = uatomic_read(&l->frees); - tot = uatomic_read(&l->age_total); - return DBL2NUM(frees ? ((double)tot/(double)frees) : HUGE_VAL); -} - -/* The number of frees made from this location */ -static VALUE src_loc_frees(VALUE self) -{ - return SIZET2NUM(uatomic_read(&src_loc_get(self)->frees)); -} - -/* The number of allocations made from this location */ -static VALUE src_loc_allocations(VALUE self) -{ - return SIZET2NUM(uatomic_read(&src_loc_get(self)->allocations)); -} - -/* The total number of bytes allocated from this location */ -static VALUE src_loc_total(VALUE self) -{ - return SIZET2NUM(uatomic_read(&src_loc_get(self)->total)); -} - -/* - * The maximum age (in GC generations) of an allocation before it was freed. - * This does not account for live allocations. - */ -static VALUE src_loc_max_lifespan(VALUE self) -{ - return SIZET2NUM(uatomic_read(&src_loc_get(self)->max_lifespan)); -} - -/* - * Returns a frozen String location of the given SourceLocation object. - */ -static VALUE src_loc_name(VALUE self) -{ - struct src_loc *l = src_loc_get(self); - VALUE ret; - - ++locating; - ret = location_string(l); - --locating; - return ret; -} - -static VALUE reset_locating(VALUE ign) { --locating; return Qfalse; } - -/* - * call-seq: - * - * Mwrap.quiet do |depth| - * # expensive sort/calculate/emitting results of Mwrap.each - * # affecting statistics of the rest of the app - * end - * - * Stops allocation tracking inside the block. This is useful for - * monitoring code which calls other Mwrap (or ObjectSpace/GC) - * functions which unavoidably allocate memory. - * - * This feature was added in mwrap 2.0.0+ - */ -static VALUE mwrap_quiet(VALUE mod) -{ - size_t cur = ++locating; - return rb_ensure(rb_yield, SIZET2NUM(cur), reset_locating, 0); -} - -static VALUE total_inc(VALUE mod) -{ - return SIZET2NUM(total_bytes_inc); -} - -static VALUE total_dec(VALUE mod) -{ - return SIZET2NUM(total_bytes_dec); -} - -static VALUE hpb_each_yield(VALUE ignore) -{ - struct alloc_hdr *h, *next; - - cds_list_for_each_entry_safe(h, next, &hpb_stats.bodies, anode) { - VALUE v[2]; /* [ generation, address ] */ - void *addr = hdr2ptr(h); - assert(ptr_is_aligned(addr, HEAP_PAGE_ALIGN)); - v[0] = LONG2NUM((long)addr); - v[1] = SIZET2NUM(h->as.live.gen); - rb_yield_values2(2, v); - } - return Qnil; -} - -/* - * call-seq: - * - * Mwrap::HeapPageBody.each { |gen, addr| } -> Integer - * - * Yields the generation (GC.count) the heap page body was created - * and address of the heap page body as an Integer. Returns the - * number of allocated pages as an Integer. This return value should - * match the result of GC.stat(:heap_allocated_pages) - */ -static VALUE hpb_each(VALUE mod) -{ - ++locating; - return rb_ensure(hpb_each_yield, Qfalse, reset_locating, 0); -} - -/* - * call-seq: - * - * Mwrap::HeapPageBody.stat -> Hash - * Mwrap::HeapPageBody.stat(hash) -> hash - * - * The maximum lifespan of a heap page body in the Ruby VM. - * This may be Infinity if no heap page bodies were ever freed. - */ -static VALUE hpb_stat(int argc, VALUE *argv, VALUE hpb) -{ - VALUE h; - - rb_scan_args(argc, argv, "01", &h); - if (NIL_P(h)) - h = rb_hash_new(); - else if (!RB_TYPE_P(h, T_HASH)) - rb_raise(rb_eTypeError, "not a hash %+"PRIsVALUE, h); - - ++locating; -#define S(x) ID2SYM(rb_intern(#x)) - rb_hash_aset(h, S(lifespan_max), acc_max(&hpb_stats.alive)); - rb_hash_aset(h, S(lifespan_min), acc_min(&hpb_stats.alive)); - rb_hash_aset(h, S(lifespan_mean), acc_mean(&hpb_stats.alive)); - rb_hash_aset(h, S(lifespan_stddev), acc_stddev(&hpb_stats.alive)); - rb_hash_aset(h, S(deathspan_max), acc_max(&hpb_stats.reborn)); - rb_hash_aset(h, S(deathspan_min), acc_min(&hpb_stats.reborn)); - rb_hash_aset(h, S(deathspan_mean), acc_mean(&hpb_stats.reborn)); - rb_hash_aset(h, S(deathspan_stddev), acc_stddev(&hpb_stats.reborn)); - rb_hash_aset(h, S(resurrects), SIZET2NUM(hpb_stats.reborn.nr)); -#undef S - --locating; - - return h; -} - -/* - * Document-module: Mwrap - * - * require 'mwrap' - * - * Mwrap has a dual function as both a Ruby C extension and LD_PRELOAD - * wrapper. As a Ruby C extension, it exposes a limited Ruby API. - * To be effective at gathering status, mwrap must be loaded as a - * LD_PRELOAD (using the mwrap(1) executable makes it easy) - * - * ENVIRONMENT - * - * The "MWRAP" environment variable contains a comma-delimited list - * of key:value options for automatically dumping at program exit. - * - * * dump_fd: a writable FD to dump to - * * dump_path: a path to dump to, the file is opened in O_APPEND mode - * * dump_min: the minimum allocation size (total) to dump - * * dump_heap: mask of heap_page_body statistics to dump - * - * If both `dump_fd' and `dump_path' are specified, dump_path takes - * precedence. - * - * dump_heap bitmask - * * 0x01 - summary stats (same info as HeapPageBody.stat) - * * 0x02 - all live heaps (similar to HeapPageBody.each) - * * 0x04 - skip non-heap_page_body-related output - */ -void Init_mwrap(void) -{ - VALUE mod, hpb; - - ++locating; - mod = rb_define_module("Mwrap"); - id_uminus = rb_intern("-@"); - - /* - * Represents a location in source code or library - * address which calls a memory allocation. It is - * updated automatically as allocations are made, so - * there is no need to reload or reread it from Mwrap#[]. - * This class is only available since mwrap 2.0.0+. - */ - cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject); - rb_define_singleton_method(mod, "dump", mwrap_dump, -1); - rb_define_singleton_method(mod, "reset", mwrap_reset, 0); - rb_define_singleton_method(mod, "clear", mwrap_clear, 0); - rb_define_singleton_method(mod, "each", mwrap_each, -1); - rb_define_singleton_method(mod, "[]", mwrap_aref, 1); - rb_define_singleton_method(mod, "quiet", mwrap_quiet, 0); - rb_define_singleton_method(mod, "total_bytes_allocated", total_inc, 0); - rb_define_singleton_method(mod, "total_bytes_freed", total_dec, 0); - - - rb_define_method(cSrcLoc, "each", src_loc_each, 0); - rb_define_method(cSrcLoc, "frees", src_loc_frees, 0); - rb_define_method(cSrcLoc, "allocations", src_loc_allocations, 0); - rb_define_method(cSrcLoc, "total", src_loc_total, 0); - rb_define_method(cSrcLoc, "mean_lifespan", src_loc_mean_lifespan, 0); - rb_define_method(cSrcLoc, "max_lifespan", src_loc_max_lifespan, 0); - rb_define_method(cSrcLoc, "name", src_loc_name, 0); - - /* - * Information about "struct heap_page_body" allocations from - * Ruby gc.c. This can be useful for tracking fragmentation - * from posix_memalign(3) use in mainline Ruby: - * - * https://sourceware.org/bugzilla/show_bug.cgi?id=14581 - */ - hpb = rb_define_class_under(mod, "HeapPageBody", rb_cObject); - rb_define_singleton_method(hpb, "stat", hpb_stat, -1); - rb_define_singleton_method(hpb, "each", hpb_each, 0); - - --locating; -} - -enum { - DUMP_HPB_STATS = 0x1, - DUMP_HPB_EACH = 0x2, - DUMP_HPB_EXCL = 0x4, -}; - -static void dump_hpb(FILE *fp, unsigned flags) -{ - if (flags & DUMP_HPB_STATS) { - fprintf(fp, - "lifespan_max: %zu\n" - "lifespan_min:%s%zu\n" - "lifespan_mean: %0.3f\n" - "lifespan_stddev: %0.3f\n" - "deathspan_max: %zu\n" - "deathspan_min:%s%zu\n" - "deathspan_mean: %0.3f\n" - "deathspan_stddev: %0.3f\n" - "gc_count: %zu\n", - hpb_stats.alive.max, - hpb_stats.alive.min == INT64_MAX ? " -" : " ", - hpb_stats.alive.min, - hpb_stats.alive.mean, - acc_stddev_dbl(&hpb_stats.alive), - hpb_stats.reborn.max, - hpb_stats.reborn.min == INT64_MAX ? " -" : " ", - hpb_stats.reborn.min, - hpb_stats.reborn.mean, - acc_stddev_dbl(&hpb_stats.reborn), - /* n.b.: unsafe to call rb_gc_count() in destructor */ - generation); - } - if (flags & DUMP_HPB_EACH) { - struct alloc_hdr *h; - - cds_list_for_each_entry(h, &hpb_stats.bodies, anode) { - void *addr = hdr2ptr(h); - - fprintf(fp, "%p\t%zu\n", addr, h->as.live.gen); - } - } -} - -/* rb_cloexec_open isn't usable by non-Ruby processes */ -#ifndef O_CLOEXEC -# define O_CLOEXEC 0 -#endif - -__attribute__ ((destructor)) -static void mwrap_dump_destructor(void) -{ - const char *opt = getenv("MWRAP"); - const char *modes[] = { "a", "a+", "w", "w+", "r+" }; - struct dump_arg a = { .min = 0 }; - size_t i; - int dump_fd; - unsigned dump_heap = 0; - char *dump_path; - char *s; - - if (!opt) - return; - - ++locating; - if ((dump_path = strstr(opt, "dump_path:")) && - (dump_path += sizeof("dump_path")) && - *dump_path) { - char *end = strchr(dump_path, ','); - if (end) { - char *tmp = alloca(end - dump_path + 1); - end = mempcpy(tmp, dump_path, end - dump_path); - *end = 0; - dump_path = tmp; - } - dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT, - 0666); - if (dump_fd < 0) { - fprintf(stderr, "open %s failed: %s\n", dump_path, - strerror(errno)); - goto out; - } - } - else if (!sscanf(opt, "dump_fd:%d", &dump_fd)) - goto out; - - if ((s = strstr(opt, "dump_min:"))) - sscanf(s, "dump_min:%zu", &a.min); - - if ((s = strstr(opt, "dump_heap:"))) - sscanf(s, "dump_heap:%u", &dump_heap); - - switch (dump_fd) { - case 0: goto out; - case 1: a.fp = stdout; break; - case 2: a.fp = stderr; break; - default: - if (dump_fd < 0) - goto out; - a.fp = 0; - - for (i = 0; !a.fp && i < 5; i++) - a.fp = fdopen(dump_fd, modes[i]); - - if (!a.fp) { - fprintf(stderr, "failed to open fd=%d: %s\n", - dump_fd, strerror(errno)); - goto out; - } - /* we'll leak some memory here, but this is a destructor */ - } - if ((dump_heap & DUMP_HPB_EXCL) == 0) - dump_to_file(&a); - dump_hpb(a.fp, dump_heap); -out: - --locating; -} diff --git a/ext/mwrap/jhash.h b/jhash.h similarity index 100% rename from ext/mwrap/jhash.h rename to jhash.h diff --git a/lib/Devel/Mwrap.pm b/lib/Devel/Mwrap.pm new file mode 100644 index 0000000..f74f7d1 --- /dev/null +++ b/lib/Devel/Mwrap.pm @@ -0,0 +1,15 @@ +# Copyright (C) 2019 all contributors +# License: GPL-2.0+ +package Devel::Mwrap; +use strict; +our $VERSION = '0.0.0'; +use XSLoader; +XSLoader::load(__PACKAGE__, $VERSION); + +1; +__END__ +=pod + +=head1 NAME + +Devel::Mwrap - LD_PRELOAD malloc wrapper + line stats for Perl diff --git a/lib/mwrap_rack.rb b/lib/mwrap_rack.rb deleted file mode 100644 index e45b26d..0000000 --- a/lib/mwrap_rack.rb +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (C) 2018 all contributors -# License: GPL-2.0+ -# frozen_string_literal: true -require 'mwrap' -require 'rack' -require 'cgi' - -# MwrapRack is a standalone Rack application which can be -# mounted to run within your application process. -# -# Using the Rack::Builder API in config.ru, you can map it to -# the "/MWRAP/" endpoint. As with the rest of the Mwrap API, -# your Rack server needs to be spawned with the mwrap(1) -# wrapper to enable the LD_PRELOAD. -# -# require 'mwrap_rack' -# map('/MWRAP') { run(MwrapRack.new) } -# map('/') { run(your_normal_app) } -# -# A live demo is available at https://80x24.org/MWRAP/ -# (warning the demo machine is 32-bit, so counters will overflow) -# -# This module is only available in mwrap 2.0.0+ -class MwrapRack - module HtmlResponse # :nodoc: - def response - [ 200, { - 'Expires' => 'Fri, 01 Jan 1980 00:00:00 GMT', - 'Pragma' => 'no-cache', - 'Cache-Control' => 'no-cache, max-age=0, must-revalidate', - 'Content-Type' => 'text/html; charset=UTF-8', - }, self ] - end - end - - class Each < Struct.new(:script_name, :min, :sort) # :nodoc: - include HtmlResponse - HEADER = '' + %w(total allocations frees mean_life max_life - location).join('') + '' - FIELDS = %w(total allocations frees mean_life max_life location) - def each - Mwrap.quiet do - t = -"Mwrap.each(#{min})" - sn = script_name - all = [] - f = FIELDS.dup - sc = FIELDS.index(sort || 'total') || 0 - f[sc] = -"#{f[sc]}" - f.map! do |hdr| - if hdr.start_with?('') - hdr - else - -%Q(#{hdr}) - end - end - Mwrap.each(min) do |loc, total, allocations, frees, age_sum, max_life| - mean_life = frees == 0 ? Float::INFINITY : age_sum/frees.to_f - all << [total,allocations,frees,mean_life,max_life,loc] - end - all.sort_by! { |cols| -cols[sc] } - - yield(-"#{t}" \ - "

#{t}

\n" \ - "

Current generation: #{GC.count}

\n\n" \ - "\n") - all.each do |cols| - loc = cols.pop - cols[3] = sprintf('%0.3f', cols[3]) # mean_life - href = -(+"#{sn}/at/#{CGI.escape(loc)}").encode!(xml: :attr) - yield(%Q(\n)) - cols.clear - end.clear - yield "
#{f.join('')}
#{cols.join('')}#{-loc.encode(xml: :text)}
\n" - end - end - end - - class EachAt < Struct.new(:loc) # :nodoc: - include HtmlResponse - HEADER = 'sizegeneration' - - def each - t = loc.name.encode(xml: :text) - yield(-"#{t}" \ - "

live allocations at #{t}

" \ - "

Current generation: #{GC.count}

\n#{HEADER}") - loc.each do |size, generation| - yield("\n") - end - yield "
#{size}#{generation}
\n" - end - end - - class HeapPages # :nodoc: - include HtmlResponse - HEADER = 'addressgeneration' - - def hpb_rows - Mwrap::HeapPageBody.stat(stat = Thread.current[:mwrap_hpb_stat] ||= {}) - %i(lifespan_max lifespan_min lifespan_mean lifespan_stddev - deathspan_max deathspan_min deathspan_mean deathspan_stddev - resurrects - ).map! do |k| - "#{k}#{stat[k]}\n" - end.join - end - - def gc_stat_rows - GC.stat(stat = Thread.current[:mwrap_gc_stat] ||= {}) - %i(count heap_allocated_pages heap_eden_pages heap_tomb_pages - total_allocated_pages total_freed_pages).map do |k| - "GC.stat(:#{k})#{stat[k]}\n" - end.join - end - - GC_STAT_URL = 'https://docs.ruby-lang.org/en/trunk/GC.html#method-c-stat' - GC_STAT_HELP = <<~"" -

Non-Infinity lifespans can indicate fragmentation. -

See #{GC_STAT_URL} for info on GC.stat values. - - def each - Mwrap.quiet do - yield("heap pages" \ - "

heap pages

" \ - "\n" \ - "#{hpb_rows}" \ - "#{gc_stat_rows}" \ - "
statvalue
\n" \ - "#{GC_STAT_HELP}" \ - "#{HEADER}") - Mwrap::HeapPageBody.each do |addr, generation| - addr = -sprintf('0x%x', addr) - yield(-"\n") - end - yield "
#{addr}#{generation}
\n" - end - end - end - - def r404 # :nodoc: - [404,{'Content-Type'=>'text/plain'},["Not found\n"]] - end - - # The standard Rack application endpoint for MwrapRack - def call(env) - case env['PATH_INFO'] - when %r{\A/each/(\d+)\z} - min = $1.to_i - m = env['QUERY_STRING'].match(/\bsort=(\w+)/) - Each.new(env['SCRIPT_NAME'], min, m ? m[1] : nil).response - when %r{\A/at/(.*)\z} - loc = -CGI.unescape($1) - loc = Mwrap[loc] or return r404 - EachAt.new(loc).response - when '/heap_pages' - HeapPages.new.response - when '/' - n = 2000 - u = 'https://80x24.org/mwrap/README.html' - b = -('Mwrap demo' \ - "

allocations >#{n} bytes" \ - "

#{u}" \ - "

heap pages" \ - "\n") - [ 200, {'Content-Type'=>'text/html','Content-Length'=>-b.size.to_s},[b]] - else - r404 - end - end -end diff --git a/mwrap.gemspec b/mwrap.gemspec deleted file mode 100644 index 2c01a68..0000000 --- a/mwrap.gemspec +++ /dev/null @@ -1,32 +0,0 @@ -git_manifest = `git ls-files 2>/dev/null`.split("\n") -manifest = File.exist?('MANIFEST') ? - File.readlines('MANIFEST').map!(&:chomp).delete_if(&:empty?) : git_manifest -if git_manifest[0] && manifest != git_manifest - tmp = "MANIFEST.#$$.tmp" - File.open(tmp, 'w') { |fp| fp.puts(git_manifest.join("\n")) } - File.rename(tmp, 'MANIFEST') - system('git add MANIFEST') -end - -desc = `git describe --abbrev=4 HEAD`.strip.tr('-', '.').delete_prefix('v') - -Gem::Specification.new do |s| - s.name = 'mwrap' - s.version = desc.empty? ? '2.0.0' : desc - s.homepage = 'https://80x24.org/mwrap/' - s.authors = ["Ruby hackers"] - s.summary = 'LD_PRELOAD malloc wrapper for Ruby' - s.executables = %w(mwrap) - s.files = manifest - s.description = <<~EOF -mwrap wraps all malloc, calloc, and realloc calls to trace the Ruby -source location of such calls and bytes allocated at each callsite. - EOF - s.email = %q{e@80x24.org} - s.test_files = Dir['test/test_*.rb'] - s.extensions = %w(ext/mwrap/extconf.rb) - - s.add_development_dependency('test-unit', '~> 3.0') - s.add_development_dependency('rake-compiler', '~> 1.0') - s.licenses = %w(GPL-2.0+) -end diff --git a/script/mwrap-perl b/script/mwrap-perl new file mode 100644 index 0000000..5e5eec4 --- /dev/null +++ b/script/mwrap-perl @@ -0,0 +1,34 @@ +#!/usr/bin/perl -w +# Copyright (C) 2019 mwrap hackers +# License: GPL-2.0+ +use strict; +use Devel::Mwrap; +my $so; +if ($^O eq 'linux') { + my $maps = do { + open my $fh, '<', "/proc/$$/maps" or + die "/proc/$$/maps not accessible: $!\n"; + local $/; + <$fh>; + }; + if ($maps =~ m![ \t](/[^\n]+?/Mwrap\.so)$!sm) { + $so = $1; + } else { + die "Mwrap.so not found in: $so\n"; + } +} else { + die "unsupported OS ($^O ne 'linux')"; +} +my $cur = $ENV{LD_PRELOAD}; +if (defined $cur) { + my @cur = split(/[: \t]+/, $cur); + my %cur = map { $_ => 1 } @cur; + if (!$cur{$so}) { + # drop old redundant versions + my @keep = grep(!m!/Mwrap\.so\$!, @cur); + $ENV{LD_PRELOAD} = join(':', $so, @keep); + } +} else { + $ENV{LD_PRELOAD} = $so; +} +exec @ARGV; diff --git a/t/mwrap.t b/t/mwrap.t new file mode 100644 index 0000000..5bcc285 --- /dev/null +++ b/t/mwrap.t @@ -0,0 +1,85 @@ +#!perl -w +# Copyright (C) 2019 mwrap hackers +# License: GPL-2.0+ +use strict; +use Test::More; +use File::Temp qw(tempdir); +use_ok 'Devel::Mwrap'; + +my $tmpdir = tempdir('mwrap-perl-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $dump = "$tmpdir/dump"; +my $out = "$tmpdir/out"; +my $err = "$tmpdir/err"; +my $src = slurp('blib/script/mwrap-perl'); + +{ + my $env = { MWRAP => "dump_path:$dump,dump_min:10000" }; + my $nr = 1000; + mwrap_run('dump test', $env, '-e', '$x = "hello world" x '.$nr); + ok(-s $dump, "dump file written to"); + my $s = slurp($dump); + my $re = qr/([0-9]+)[ \t]+([0-9]+)[ \t]+-e:1[ \t]*\n/sm; + my ($bytes, $n); + if ($s =~ $re) { + ($bytes, $n) = ($1, $2); + ok($bytes >= (length('hello world') * $nr), + "counted 'hello world' x $nr"); + ok($n >= 1, 'allocation counted'); + } else { + fail("$s failed to match $re"); + } +} + +SKIP: { # C++ program which uses malloc via "new" + my $exp = `cmake -h`; + skip 'cmake missing', 2 if $?; + skip "`cmake -h' gave no output", 2 unless $exp =~ /\S/s; + open my $truncate, '>', $dump or die; + close $truncate or die; + my $env = { MWRAP => "dump_path:$dump" }; + mwrap_run('cmake (C++ new)', $env, '-e', + 'system(qw(cmake -h)); exit $?'); + my $res = slurp($out); + is($res, $exp, "`cmake -h' works"); +}; + +{ + my $env = { MWRAP => "dump_path:$dump" }; + mwrap_run('total_bytes*', $env, '-e', <<'E1'); +my $A = Devel::Mwrap::total_bytes_allocated(); +my $f = Devel::Mwrap::total_bytes_freed(); +print("$A - $f\n"); +E1 + my $o = slurp($out); + like($o, qr/^([0-9]+) - ([0-9]+)\n/s, 'got allocated & freed bytes'); +} + +{ + my $env = { MWRAP => "dump_path:$dump" }; + mwrap_run('source location', $env, 't/source_location.perl'); +} + +done_testing(); + +sub slurp { + open my $fh, '<', $_[0] or die "open($_[0]): $!"; + local $/; + <$fh>; +} + +sub mwrap_run { + my ($msg, $env, @args) = @_; + my $pid = fork; + if ($pid == 0) { + while (my ($k, $v) = each %$env) { + $ENV{$k} = $v; + } + open STDERR, '>', $err or die "open: $!"; + open STDOUT, '>', $out or die "open: $!"; + @ARGV = ($^X, '-MDevel::Mwrap', @args); + eval $src; + die "fail: $! ($@)"; + } + waitpid($pid, 0); + is($?, 0, $msg); +} diff --git a/t/source_location.perl b/t/source_location.perl new file mode 100644 index 0000000..ed81ed8 --- /dev/null +++ b/t/source_location.perl @@ -0,0 +1,9 @@ +use Devel::Mwrap; +my $foo = ('hello world' x 10000); +my $k = __FILE__ . ":2"; +my $loc = Devel::Mwrap::get($k) or die; +$loc->name eq $k or die; +$loc->total >= 10000 or die; +$loc->allocations >= 1 or die; +$loc->frees >= 0 or die; +exit 0; diff --git a/test/test_mwrap.rb b/test/test_mwrap.rb deleted file mode 100644 index 48fba23..0000000 --- a/test/test_mwrap.rb +++ /dev/null @@ -1,322 +0,0 @@ -# frozen_string_literal: true -# Copyright (C) 2018 mwrap hackers -# License: GPL-2.0+ -require 'test/unit' -require 'mwrap' -require 'rbconfig' -require 'tempfile' - -class TestMwrap < Test::Unit::TestCase - RB = "#{RbConfig::CONFIG['bindir']}/#{RbConfig::CONFIG['RUBY_INSTALL_NAME']}" - - mwrap_so = $".grep(%r{/mwrap\.so\z})[0] - env = ENV.to_hash - cur = env['LD_PRELOAD'] - env['LD_PRELOAD'] = cur ? "#{mwrap_so}:#{cur}".freeze : mwrap_so - @@env = env.freeze - inc = File.dirname(mwrap_so) - @@cmd = %W(#{RB} -w --disable=gems -I#{inc} -rmwrap).freeze - - def test_mwrap_preload - cmd = @@cmd + %w( - -e ("helloworld"*1000).clear - -e Mwrap.dump - ) - Tempfile.create('junk') do |tmp| - tmp.sync = true - res = system(@@env, *cmd, err: tmp) - assert res, $?.inspect - tmp.rewind - lines = tmp.readlines - line_1 = lines.grep(/\s-e:1\b/)[0].strip - assert_equal '10001', line_1.split(/\s+/)[0] - end - end - - def test_dump_via_destructor - env = @@env.dup - env['MWRAP'] = 'dump_fd:5' - cmd = @@cmd + %w(-e ("0"*10000).clear) - Tempfile.create('junk') do |tmp| - tmp.sync = true - res = system(env, *cmd, { 5 => tmp }) - assert res, $?.inspect - tmp.rewind - assert_match(/\b10001\s+1\s+-e:1$/, tmp.read) - - env['MWRAP'] = 'dump_fd:1,dump_min:10000' - tmp.rewind - tmp.truncate(0) - res = system(env, *cmd, { 1 => tmp }) - assert res, $?.inspect - tmp.rewind - assert_match(/\b10001\s+1\s+-e:1$/, tmp.read) - - tmp.rewind - tmp.truncate(0) - env['MWRAP'] = "dump_path:#{tmp.path},dump_min:10000" - res = system(env, *cmd) - assert res, $?.inspect - assert_match(/\b10001\s+1\s+-e:1$/, tmp.read) - - tmp.rewind - tmp.truncate(0) - env['MWRAP'] = "dump_path:#{tmp.path},dump_heap:5" - res = system(env, *cmd) - assert res, $?.inspect - assert_match %r{lifespan_stddev}, tmp.read - end - end - - def test_cmake - begin - exp = `cmake -h` - rescue Errno::ENOENT - warn 'cmake missing' - return - end - assert_not_predicate exp.strip, :empty? - env = @@env.merge('MWRAP' => 'dump_fd:1') - out = IO.popen(env, %w(cmake -h), &:read) - assert out.start_with?(exp), 'original help exists' - assert_not_equal exp, out, 'includes dump output' - dump = out.delete_prefix(exp) - assert_match(/\b0x[a-f0-9]+\b/s, dump, 'dump output has addresses') - end - - def test_clear - cmd = @@cmd + %w( - -e ("0"*10000).clear - -e Mwrap.clear - -e ("0"*20000).clear - -e Mwrap.dump($stdout,9999) - ) - Tempfile.create('junk') do |tmp| - tmp.sync = true - res = system(@@env, *cmd, { 1 => tmp }) - assert res, $?.inspect - tmp.rewind - buf = tmp.read - assert_not_match(/\s+-e:1$/, buf) - assert_match(/\b20001\s+1\s+-e:3$/, buf) - end - end - - # make sure we don't break commands spawned by an mwrap-ed Ruby process: - def test_non_ruby_exec - IO.pipe do |r, w| - th = Thread.new { r.read } - Tempfile.create('junk') do |tmp| - tmp.sync = true - env = @@env.merge('MWRAP' => "dump_path:#{tmp.path}") - cmd = %w(perl -e print("HELLO_WORLD")) - res = system(env, *cmd, out: w) - w.close - assert res, $?.inspect - assert_match(/0x[a-f0-9]+\b/, tmp.read) - end - assert_equal "HELLO_WORLD", th.value - end - end - - # some URCU flavors use USR1, ensure the one we choose does not - def test_sigusr1_works - cmd = @@cmd + %w( - -e STDOUT.sync=true - -e trap(:USR1){p("HELLO_WORLD")} - -e END{Mwrap.dump} - -e puts -e STDIN.read) - IO.pipe do |r, w| - IO.pipe do |r2, w2| - pid = spawn(@@env, *cmd, in: r2, out: w, err: '/dev/null') - r2.close - w.close - assert_equal "\n", r.gets - buf = +'' - 10.times { Process.kill(:USR1, pid) } - while IO.select([r], nil, nil, 0.1) - case tmp = r.read_nonblock(1000, exception: false) - when String - buf << tmp - end - end - w2.close - Process.wait(pid) - assert_predicate $?, :success?, $?.inspect - assert_equal(["\"HELLO_WORLD\"\n"], buf.split(/^/).uniq) - end - end - end - - def test_reset - assert_nil Mwrap.reset - end - - def test_each - cmd = @@cmd + %w( - -e ("0"*10000).clear - -e h={} - -e Mwrap.each(1000){|a,b,c|h[a]=[b,c]} - -e puts(Marshal.dump(h)) - ) - r = IO.popen(@@env, cmd, 'r') - h = Marshal.load(r.read) - assert_not_predicate h, :empty? - h.each_key { |k| assert_kind_of String, k } - h.each_value do |total,calls| - assert_operator total, :>, 0 - assert_operator calls, :>, 0 - assert_operator total, :>=, calls - end - end - - def test_aref_each - cmd = @@cmd + %w( - -e count=GC.count - -e GC.disable - -e keep=("0"*10000) - -e loc=Mwrap["-e:3"] - -e loc.each{|size,gen|p([size,gen,count])} - ) - buf = IO.popen(@@env, cmd, &:read) - assert_predicate $?, :success? - assert_match(/\A\[\s*\d+,\s*\d+,\s*\d+\]\s*\z/s, buf) - size, gen, count = eval(buf) - assert_operator size, :>=, 10000 - assert_operator gen, :>=, count - - cmd = @@cmd + %w( - -e count=GC.count - -e locs="" - -e Mwrap.each(1){|loc,tot,calls|locs<= 10000 or abort 'SourceLocation#total broken' - loc.frees == 0 or abort 'SourceLocation#frees broken' - loc.allocations == 1 or abort 'SourceLocation#allocations broken' - seen = false - loc.each do |*x| seen = x end - seen[1] == loc.total or 'SourceLocation#each broken' - foo.clear - - # wait for call_rcu to perform real_free - freed = false - until freed - freed = true - loc.each do freed = false end - end - loc.frees == 1 or abort 'SourceLocation#frees broken (after free)' - Float === loc.mean_lifespan or abort 'mean_lifespan broken' - Integer === loc.max_lifespan or abort 'max_lifespan broken' - - addr = false - Mwrap.each do |a,| - if a =~ /0x[a-f0-9]+/ - addr = a - break - end - end - addr && addr.frozen? or abort 'Mwrap.each returned unfrozen address' - loc = Mwrap[addr] or abort "Mwrap[#{addr}] broken" - addr == loc.name or abort 'SourceLocation#name works on address' - loc.name.frozen? or abort 'SourceLocation#name not frozen' - end; - end - - def test_quiet - assert_separately(+"#{<<~"begin;"}\n#{<<~'end;'}") - begin; - require 'mwrap' - before = __LINE__ - res = Mwrap.quiet do |depth| - depth == 1 or abort 'depth is not 1' - ('a' * 10000).clear - Mwrap.quiet { |d| d == 2 or abort 'depth is not 2' } - :foo - end - after = __LINE__ - 1 - (before..after).each do |lineno| - Mwrap["#{__FILE__}:#{lineno}"] and - abort "unexpectedly tracked allocation at line #{lineno}" - end - res == :foo or abort 'Mwrap.quiet did not return block result' - end; - end - - def test_total_bytes - assert_separately(+"#{<<~"begin;"}\n#{<<~'end;'}") - begin; - require 'mwrap' - Mwrap.total_bytes_allocated > 0 or abort 'nothing allocated' - Mwrap.total_bytes_freed > 0 or abort 'nothing freed' - Mwrap.total_bytes_allocated > Mwrap.total_bytes_freed or - abort 'freed more than allocated' - end; - end - - def test_heap_page_body - assert_separately(+"#{<<~"begin;"}\n#{<<~'end;'}") - begin; - require 'mwrap' - require 'rubygems' # use up some memory - ap = GC.stat(:heap_allocated_pages) - h = {} - nr = 0 - Mwrap::HeapPageBody.each do |addr, gen| - nr += 1 - gen <= GC.count && gen >= 0 or abort "bad generation: #{gen}" - (0 == (addr & 16383)) or abort "addr not aligned: #{'%x' % addr}" - end - nr == ap or abort 'HeapPageBody.each missed page' - 10.times { (1..20000).to_a.map(&:to_s) } - 3.times { GC.start } - Mwrap::HeapPageBody.stat(h) - Integer === h[:lifespan_max] or abort 'lifespan_max not recorded' - Integer === h[:lifespan_min] or abort 'lifespan_min not recorded' - Float === h[:lifespan_mean] or abort 'lifespan_mean not recorded' - 3.times { GC.start } - 10.times { (1..20000).to_a.map(&:to_s) } - Mwrap::HeapPageBody.stat(h) - h[:deathspan_min] <= h[:deathspan_max] or - abort 'wrong min/max deathtime' - Float === h[:deathspan_mean] or abort 'deathspan_mean not recorded' - end; - end -end diff --git a/typemap b/typemap new file mode 100644 index 0000000..9531289 --- /dev/null +++ b/typemap @@ -0,0 +1,4 @@ +TYPEMAP +size_t T_UV +const char * T_PV +Devel::Mwrap::SrcLoc T_PTROBJ