From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 526CD1F4C1 for ; Thu, 21 Nov 2019 23:04:09 +0000 (UTC) From: ew To: mwrap-perl@80x24.org Subject: [PATCH] implement "age" concept Date: Thu, 21 Nov 2019 23:04:09 +0000 Message-Id: <20191121230409.2010-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Since Perl5 doesn't have a GC like Ruby and the concept of GC generations, we'll instead rely on the total bytes allocated in the process at the time of allocation so allocations can have a relative (and high-granularity) age to compare each other against. --- Mwrap.xs | 76 +++++++++++++++++++++++++++++++++++++++++++++---------- t/mwrap.t | 14 ++++++++++ typemap | 1 + 3 files changed, 77 insertions(+), 14 deletions(-) diff --git a/Mwrap.xs b/Mwrap.xs index 6d3c6d2..ed51be4 100644 --- a/Mwrap.xs +++ b/Mwrap.xs @@ -26,6 +26,12 @@ #include #include "jhash.h" +/* + * Perl doesn't have a GC the same way (C) Ruby does, so no GC count. + * Instead, the relative age of an object is the number of total bytes + * allocated (and we don't care about overflow on 32-bit since + * hardly anybody still uses it). + */ static size_t total_bytes_inc, total_bytes_dec; extern pthread_key_t __attribute__((weak)) PL_thr_key; @@ -194,6 +200,8 @@ struct src_loc { size_t total; size_t allocations; size_t frees; + size_t age_total; /* (age_total / frees) => mean age at free */ + size_t max_lifespan; struct cds_lfht_node hnode; struct cds_list_head allocs; /* <=> alloc_hdr.node */ uint32_t hval; @@ -215,6 +223,7 @@ struct alloc_hdr { struct cds_list_head anode; /* <=> src_loc.allocs */ union { struct { + size_t gen; /* global age */ struct src_loc *loc; } live; struct rcu_head dead; @@ -281,6 +290,8 @@ again: if (!l) goto out_unlock; memcpy(l, k, sizeof(*l) + n); l->mtx = mutex_assign(); + l->age_total = 0; + l->max_lifespan = 0; l->frees = 0; l->allocations = 1; CDS_INIT_LIST_HEAD(&l->allocs); @@ -301,17 +312,18 @@ static void update_stats_rcu_unlock(const struct src_loc *l) if (caa_likely(l)) rcu_read_unlock(); } -static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller) +static struct src_loc * +update_stats_rcu_lock(size_t *generation, size_t size, uintptr_t caller) { - static const size_t xlen = sizeof(caller); struct src_loc *k, *ret = 0; + static const size_t xlen = sizeof(caller); char *dst; const COP *cop; if (caa_unlikely(!totals)) return 0; if (locating++) goto out; /* do not recurse into another *alloc */ - uatomic_add(&total_bytes_inc, size); + *generation = uatomic_add_return(&total_bytes_inc, size); cop = PL_curcop; rcu_read_lock(); @@ -379,12 +391,17 @@ void free(void *p) if (!real_free) return; /* oh well, leak a little */ if (l) { + size_t current_bytes = uatomic_read(&total_bytes_inc); + size_t age = current_bytes - h->as.live.gen; uatomic_add(&total_bytes_dec, h->size); uatomic_set(&h->size, 0); uatomic_add(&l->frees, 1); + uatomic_add(&l->age_total, age); mutex_lock(l->mtx); cds_list_del_rcu(&h->anode); + if (age > l->max_lifespan) + l->max_lifespan = age; mutex_unlock(l->mtx); call_rcu(&h->as.dead, free_hdr_rcu); @@ -395,13 +412,15 @@ void free(void *p) } static void -alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real) +alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, + void *real, size_t generation) { /* we need src_loc to remain alive for the duration of this call */ if (!h) return; h->size = size; h->real = real; h->as.live.loc = l; + h->as.live.gen = generation; if (l) { mutex_lock(l->mtx); cds_list_add_rcu(&h->anode, &l->allocs); @@ -433,6 +452,7 @@ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller) struct alloc_hdr *h; void *real; size_t asize; + size_t generation = 0; size_t d = alignment / sizeof(void*); size_t r = alignment % sizeof(void*); @@ -453,7 +473,7 @@ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller) __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize)) return ENOMEM; - l = update_stats_rcu_lock(size, caller); + l = update_stats_rcu_lock(&generation, size, caller); real = real_malloc(asize); if (real) { @@ -461,7 +481,7 @@ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller) if (!ptr_is_aligned(p, alignment)) p = ptr_align(p, alignment); h = ptr2hdr(p); - alloc_insert_rcu(l, h, size, real); + alloc_insert_rcu(l, h, size, real, generation); *pp = p; } update_stats_rcu_unlock(l); @@ -530,6 +550,7 @@ void *malloc(size_t size) struct alloc_hdr *h; size_t asize; void *p; + size_t generation = 0; if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) goto enomem; @@ -545,10 +566,10 @@ void *malloc(size_t size) real_malloc = dlsym(RTLD_NEXT, "malloc"); } #endif - l = update_stats_rcu_lock(size, RETURN_ADDRESS(0)); + l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0)); p = h = real_malloc(asize); if (h) { - alloc_insert_rcu(l, h, size, h); + alloc_insert_rcu(l, h, size, h, generation); p = hdr2ptr(h); } update_stats_rcu_unlock(l); @@ -565,6 +586,7 @@ void *calloc(size_t nmemb, size_t size) struct src_loc *l; struct alloc_hdr *h; size_t asize; + size_t generation = 0; if (__builtin_mul_overflow(size, nmemb, &size)) { errno = ENOMEM; @@ -575,10 +597,10 @@ void *calloc(size_t nmemb, size_t size) return 0; } RETURN_IF_NOT_READY(); - l = update_stats_rcu_lock(size, RETURN_ADDRESS(0)); + l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0)); p = h = real_malloc(asize); if (p) { - alloc_insert_rcu(l, h, size, h); + alloc_insert_rcu(l, h, size, h, generation); p = hdr2ptr(h); memset(p, 0, size); } @@ -593,6 +615,7 @@ void *realloc(void *ptr, size_t size) struct src_loc *l; struct alloc_hdr *h; size_t asize; + size_t generation = 0; if (!size) { free(ptr); @@ -604,10 +627,10 @@ void *realloc(void *ptr, size_t size) } RETURN_IF_NOT_READY(); - l = update_stats_rcu_lock(size, RETURN_ADDRESS(0)); + l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0)); p = h = real_malloc(asize); if (p) { - alloc_insert_rcu(l, h, size, h); + alloc_insert_rcu(l, h, size, h, generation); p = hdr2ptr(h); } update_stats_rcu_unlock(l); @@ -758,17 +781,24 @@ BOOT: PROTOTYPES: ENABLE +size_t +mwrap_current_age() +CODE: + RETVAL = uatomic_read(&total_bytes_inc); +OUTPUT: + RETVAL + size_t mwrap_total_bytes_allocated() CODE: - RETVAL = total_bytes_inc; + RETVAL = uatomic_read(&total_bytes_inc); OUTPUT: RETVAL size_t mwrap_total_bytes_freed() CODE: - RETVAL = total_bytes_dec; + RETVAL = uatomic_read(&total_bytes_dec); OUTPUT: RETVAL @@ -839,6 +869,8 @@ CODE: uatomic_set(&l->total, 0); uatomic_set(&l->allocations, 0); uatomic_set(&l->frees, 0); + uatomic_set(&l->age_total, 0); + uatomic_set(&l->max_lifespan, 0); } rcu_read_unlock(); @@ -932,6 +964,18 @@ OUTPUT: CLEANUP: --locating; +double +src_loc_mean_lifespan(self) + Devel::Mwrap::SrcLoc self +PREINIT: + size_t tot, frees; +CODE: + frees = uatomic_read(&self->frees); + tot = uatomic_read(&self->age_total); + RETVAL = frees ? ((double)tot/(double)frees) : HUGE_VAL; +OUTPUT: + RETVAL + SV * src_loc_name(self) Devel::Mwrap::SrcLoc self @@ -956,6 +1000,7 @@ CODE: ++locating; rcu_read_lock(); cds_list_for_each_entry_rcu(h, &self->allocs, anode) { + size_t gen = uatomic_read(&h->as.live.gen); size_t size = uatomic_read(&h->size); if (size > min) { @@ -970,6 +1015,7 @@ CODE: * since that opens us up to use-after-free */ XPUSHs(sv_2mortal(newSVuv(size))); + XPUSHs(sv_2mortal(newSVuv(gen))); PUTBACK; call_sv(cb, G_DISCARD|G_EVAL); @@ -990,3 +1036,5 @@ CODE: CLEANUP: rcu_read_unlock(); --locating; + + diff --git a/t/mwrap.t b/t/mwrap.t index 661a90a..aba9709 100644 --- a/t/mwrap.t +++ b/t/mwrap.t @@ -38,6 +38,7 @@ SKIP: { # C++ program which uses malloc via "new" mwrap_run('cmake (C++ new)', {}, '-e', 'system(qw(cmake -h)); exit $?'); my $res = slurp($out); is($res, $exp, "`cmake -h' works"); + diag slurp($err); }; { @@ -102,11 +103,24 @@ sub do_read () { for (1..$nr) { do_read() } my $loc = Devel::Mwrap::get('-e:6'); $loc && $loc->total >= ($nbytes * $nr) or die "wrong line or bad stats"; +my $ml = $loc->mean_lifespan; +$ml >= 0.0 or die "mean_lifespan broken"; my @sl_each; $loc->each($nbytes, sub { push @sl_each, \@_ }); my $n = scalar(@sl_each); $n == 1 or die "SrcLoc::each returned unexpected: $n"; $sl_each[0]->[0] >= $nbytes or die "$sl_each[0]->[0] < $nbytes"; + +my $age_before = $sl_each[0]->[1]; +$nbytes = 1024 * 1024 * 8; +do_read() until Devel::Mwrap::current_age() > ($age_before + $nbytes); +@sl_each = (); +$loc->each($nbytes, sub { push @sl_each, \@_ }); +$n = scalar(@sl_each); +$n == 1 or die "SrcLoc::each returned unexpected: $n"; +$sl_each[0]->[0] >= $nbytes or die "$sl_each[0]->[0] < $nbytes"; +my $age_after = $sl_each[0]->[1]; +$age_after >= $age_before or die "age did not increment"; EOF diag slurp($out); diff --git a/typemap b/typemap index 9531289..0b0e4a3 100644 --- a/typemap +++ b/typemap @@ -2,3 +2,4 @@ TYPEMAP size_t T_UV const char * T_PV Devel::Mwrap::SrcLoc T_PTROBJ +double T_DOUBLE