about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <mwrap-perl@yhbt.net>2020-04-16 05:35:52 +0000
committerEric Wong <mwrap-perl@yhbt.net>2020-04-16 19:58:00 +0000
commit0129ca80d27747693cd2bd98d7cd8a89f2349205 (patch)
tree4d42eb15b5e947f35935430e9ee5e1033569b80f
parentab0d084f5fea9ed3195e6088a442b78f47fdbaa6 (diff)
downloadmwrap-0129ca80d27747693cd2bd98d7cd8a89f2349205.tar.gz
Since Perl5 doesn't have a GC like Ruby and the concept of GC
generations, we'll instead rely on the total bytes allocated in
the process at the time of allocation so allocations can have
a relative (and high-granularity) age to compare each other
against.
-rw-r--r--Mwrap.xs74
-rw-r--r--t/mwrap.t14
-rw-r--r--typemap1
3 files changed, 75 insertions, 14 deletions
diff --git a/Mwrap.xs b/Mwrap.xs
index 6d3c6d2..ca408b9 100644
--- a/Mwrap.xs
+++ b/Mwrap.xs
@@ -26,6 +26,12 @@
 #include <urcu/rculist.h>
 #include "jhash.h"
 
+/*
+ * Perl doesn't have a GC the same way (C) Ruby does, so no GC count.
+ * Instead, the relative age of an object is the number of total bytes
+ * allocated (and we don't care about overflow on 32-bit since
+ * hardly anybody still uses it).
+ */
 static size_t total_bytes_inc, total_bytes_dec;
 
 extern pthread_key_t __attribute__((weak)) PL_thr_key;
@@ -194,6 +200,8 @@ struct src_loc {
         size_t total;
         size_t allocations;
         size_t frees;
+        size_t age_total; /* (age_total / frees) => mean age at free */
+        size_t max_lifespan;
         struct cds_lfht_node hnode;
         struct cds_list_head allocs; /* <=> alloc_hdr.node */
         uint32_t hval;
@@ -215,6 +223,7 @@ struct alloc_hdr {
         struct cds_list_head anode; /* <=> src_loc.allocs */
         union {
                 struct {
+                        size_t gen; /* global age */
                         struct src_loc *loc;
                 } live;
                 struct rcu_head dead;
@@ -281,6 +290,8 @@ again:
                 if (!l) goto out_unlock;
                 memcpy(l, k, sizeof(*l) + n);
                 l->mtx = mutex_assign();
+                l->age_total = 0;
+                l->max_lifespan = 0;
                 l->frees = 0;
                 l->allocations = 1;
                 CDS_INIT_LIST_HEAD(&l->allocs);
@@ -301,17 +312,18 @@ static void update_stats_rcu_unlock(const struct src_loc *l)
         if (caa_likely(l)) rcu_read_unlock();
 }
 
-static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller)
+static struct src_loc *
+update_stats_rcu_lock(size_t *generation, size_t size, uintptr_t caller)
 {
-        static const size_t xlen = sizeof(caller);
         struct src_loc *k, *ret = 0;
+        static const size_t xlen = sizeof(caller);
         char *dst;
         const COP *cop;
 
         if (caa_unlikely(!totals)) return 0;
         if (locating++) goto out; /* do not recurse into another *alloc */
 
-        uatomic_add(&total_bytes_inc, size);
+        *generation = uatomic_add_return(&total_bytes_inc, size);
         cop = PL_curcop;
 
         rcu_read_lock();
@@ -379,12 +391,17 @@ void free(void *p)
 
                 if (!real_free) return; /* oh well, leak a little */
                 if (l) {
+                        size_t current_bytes = uatomic_read(&total_bytes_inc);
+                        size_t age = current_bytes - h->as.live.gen;
                         uatomic_add(&total_bytes_dec, h->size);
                         uatomic_set(&h->size, 0);
                         uatomic_add(&l->frees, 1);
+                        uatomic_add(&l->age_total, age);
 
                         mutex_lock(l->mtx);
                         cds_list_del_rcu(&h->anode);
+                        if (age > l->max_lifespan)
+                                l->max_lifespan = age;
                         mutex_unlock(l->mtx);
 
                         call_rcu(&h->as.dead, free_hdr_rcu);
@@ -395,13 +412,15 @@ void free(void *p)
 }
 
 static void
-alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
+alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size,
+                void *real, size_t generation)
 {
         /* we need src_loc to remain alive for the duration of this call */
         if (!h) return;
         h->size = size;
         h->real = real;
         h->as.live.loc = l;
+        h->as.live.gen = generation;
         if (l) {
                 mutex_lock(l->mtx);
                 cds_list_add_rcu(&h->anode, &l->allocs);
@@ -433,6 +452,7 @@ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
         struct alloc_hdr *h;
         void *real;
         size_t asize;
+        size_t generation = 0;
         size_t d = alignment / sizeof(void*);
         size_t r = alignment % sizeof(void*);
 
@@ -453,7 +473,7 @@ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
             __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
                 return ENOMEM;
 
-        l = update_stats_rcu_lock(size, caller);
+        l = update_stats_rcu_lock(&generation, size, caller);
 
         real = real_malloc(asize);
         if (real) {
@@ -461,7 +481,7 @@ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
                 if (!ptr_is_aligned(p, alignment))
                         p = ptr_align(p, alignment);
                 h = ptr2hdr(p);
-                alloc_insert_rcu(l, h, size, real);
+                alloc_insert_rcu(l, h, size, real, generation);
                 *pp = p;
         }
         update_stats_rcu_unlock(l);
@@ -530,6 +550,7 @@ void *malloc(size_t size)
         struct alloc_hdr *h;
         size_t asize;
         void *p;
+        size_t generation = 0;
 
         if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
                 goto enomem;
@@ -545,10 +566,10 @@ void *malloc(size_t size)
                 real_malloc = dlsym(RTLD_NEXT, "malloc");
         }
 #endif
-        l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
+        l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0));
         p = h = real_malloc(asize);
         if (h) {
-                alloc_insert_rcu(l, h, size, h);
+                alloc_insert_rcu(l, h, size, h, generation);
                 p = hdr2ptr(h);
         }
         update_stats_rcu_unlock(l);
@@ -565,6 +586,7 @@ void *calloc(size_t nmemb, size_t size)
         struct src_loc *l;
         struct alloc_hdr *h;
         size_t asize;
+        size_t generation = 0;
 
         if (__builtin_mul_overflow(size, nmemb, &size)) {
                 errno = ENOMEM;
@@ -575,10 +597,10 @@ void *calloc(size_t nmemb, size_t size)
                 return 0;
         }
         RETURN_IF_NOT_READY();
-        l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
+        l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0));
         p = h = real_malloc(asize);
         if (p) {
-                alloc_insert_rcu(l, h, size, h);
+                alloc_insert_rcu(l, h, size, h, generation);
                 p = hdr2ptr(h);
                 memset(p, 0, size);
         }
@@ -593,6 +615,7 @@ void *realloc(void *ptr, size_t size)
         struct src_loc *l;
         struct alloc_hdr *h;
         size_t asize;
+        size_t generation = 0;
 
         if (!size) {
                 free(ptr);
@@ -604,10 +627,10 @@ void *realloc(void *ptr, size_t size)
         }
         RETURN_IF_NOT_READY();
 
-        l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
+        l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0));
         p = h = real_malloc(asize);
         if (p) {
-                alloc_insert_rcu(l, h, size, h);
+                alloc_insert_rcu(l, h, size, h, generation);
                 p = hdr2ptr(h);
         }
         update_stats_rcu_unlock(l);
@@ -759,16 +782,23 @@ BOOT:
 PROTOTYPES: ENABLE
 
 size_t
+mwrap_current_age()
+CODE:
+        RETVAL = uatomic_read(&total_bytes_inc);
+OUTPUT:
+        RETVAL
+
+size_t
 mwrap_total_bytes_allocated()
 CODE:
-        RETVAL = total_bytes_inc;
+        RETVAL = uatomic_read(&total_bytes_inc);
 OUTPUT:
         RETVAL
 
 size_t
 mwrap_total_bytes_freed()
 CODE:
-        RETVAL = total_bytes_dec;
+        RETVAL = uatomic_read(&total_bytes_dec);
 OUTPUT:
         RETVAL
 
@@ -839,6 +869,8 @@ CODE:
                 uatomic_set(&l->total, 0);
                 uatomic_set(&l->allocations, 0);
                 uatomic_set(&l->frees, 0);
+                uatomic_set(&l->age_total, 0);
+                uatomic_set(&l->max_lifespan, 0);
         }
         rcu_read_unlock();
 
@@ -932,6 +964,18 @@ OUTPUT:
 CLEANUP:
         --locating;
 
+double
+src_loc_mean_lifespan(self)
+        Devel::Mwrap::SrcLoc self
+PREINIT:
+        size_t tot, frees;
+CODE:
+        frees = uatomic_read(&self->frees);
+        tot = uatomic_read(&self->age_total);
+        RETVAL = frees ? ((double)tot/(double)frees) : HUGE_VAL;
+OUTPUT:
+        RETVAL
+
 SV *
 src_loc_name(self)
         Devel::Mwrap::SrcLoc self
@@ -956,6 +1000,7 @@ CODE:
         ++locating;
         rcu_read_lock();
         cds_list_for_each_entry_rcu(h, &self->allocs, anode) {
+                size_t gen = uatomic_read(&h->as.live.gen);
                 size_t size = uatomic_read(&h->size);
 
                 if (size > min) {
@@ -970,6 +1015,7 @@ CODE:
                          * since that opens us up to use-after-free
                          */
                         XPUSHs(sv_2mortal(newSVuv(size)));
+                        XPUSHs(sv_2mortal(newSVuv(gen)));
                         PUTBACK;
 
                         call_sv(cb, G_DISCARD|G_EVAL);
diff --git a/t/mwrap.t b/t/mwrap.t
index 661a90a..aba9709 100644
--- a/t/mwrap.t
+++ b/t/mwrap.t
@@ -38,6 +38,7 @@ SKIP: { # C++ program which uses malloc via "new"
         mwrap_run('cmake (C++ new)', {}, '-e', 'system(qw(cmake -h)); exit $?');
         my $res = slurp($out);
         is($res, $exp, "`cmake -h' works");
+        diag slurp($err);
 };
 
 {
@@ -102,11 +103,24 @@ sub do_read () {
 for (1..$nr) { do_read() }
 my $loc = Devel::Mwrap::get('-e:6');
 $loc && $loc->total >= ($nbytes * $nr) or die "wrong line or bad stats";
+my $ml = $loc->mean_lifespan;
+$ml >= 0.0 or die "mean_lifespan broken";
 my @sl_each;
 $loc->each($nbytes, sub { push @sl_each, \@_ });
 my $n = scalar(@sl_each);
 $n == 1 or die "SrcLoc::each returned unexpected: $n";
 $sl_each[0]->[0] >= $nbytes or die "$sl_each[0]->[0] < $nbytes";
+
+my $age_before = $sl_each[0]->[1];
+$nbytes = 1024 * 1024 * 8;
+do_read() until Devel::Mwrap::current_age() > ($age_before + $nbytes);
+@sl_each = ();
+$loc->each($nbytes, sub { push @sl_each, \@_ });
+$n = scalar(@sl_each);
+$n == 1 or die "SrcLoc::each returned unexpected: $n";
+$sl_each[0]->[0] >= $nbytes or die "$sl_each[0]->[0] < $nbytes";
+my $age_after = $sl_each[0]->[1];
+$age_after >= $age_before or die "age did not increment";
 EOF
 diag slurp($out);
 
diff --git a/typemap b/typemap
index 9531289..0b0e4a3 100644
--- a/typemap
+++ b/typemap
@@ -2,3 +2,4 @@ TYPEMAP
 size_t        T_UV
 const char *        T_PV
 Devel::Mwrap::SrcLoc        T_PTROBJ
+double        T_DOUBLE