about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2022-12-13 12:18:31 +0000
committerEric Wong <e@80x24.org>2022-12-15 20:34:06 +0000
commit73249f748a7d11560850b87dc764824e31f149a3 (patch)
tree61c1ab3ba875cdbf9c1a9b1c4ba1ac46669c5055
parentb77b624f509acc3f09b27c174d23d8aa2b2d34da (diff)
downloadmwrap-73249f748a7d11560850b87dc764824e31f149a3.tar.gz
It can be enlightening to see the machine backtrace (not the
Perl one) leading up to a particular malloc calls from a Perl
function.

"/$PID/at/$LOCATION" URLs now use the URL-safe Base 64 location
URLs to simplify parsing and reduce URL length for multi-level
backtraces.

Warning: increasing $NUM leads to large increases in memory
usage of mwrap itself.  This is due to common malloc wrapper
functions (e.g. "Perl_safesysmalloc", "xmalloc") taking up the
first stack level while a diverse array of callers occupy higher
up ones.
-rw-r--r--Mwrap.xs23
-rw-r--r--mwrap_core.h263
-rw-r--r--mwrap_httpd.h166
-rw-r--r--t/mwrap-httpd.t17
4 files changed, 274 insertions, 195 deletions
diff --git a/Mwrap.xs b/Mwrap.xs
index ceeb495..6adf975 100644
--- a/Mwrap.xs
+++ b/Mwrap.xs
@@ -12,24 +12,29 @@
  */
 typedef struct src_loc * Devel__Mwrap__SrcLoc;
 
+/* keep this consistent with mwrap_httpd.h write_loc_name */
 static SV *location_string(struct src_loc *l)
 {
-        SV *ret;
+        SV *ret = newSV(0);
 
         if (l->f) {
-                ret = newSV(0);
+                sv_catpv(ret, l->f->fn);
                 if (l->lineno == UINT_MAX)
-                        sv_setpvf(ret, "%s:-", l->f->fn);
+                        sv_catpvs(ret, ":-");
                 else
-                        sv_setpvf(ret, "%s:%zu", l->f->fn, l->lineno);
-        } else {
+                        sv_catpvf(ret, ":%zu", l->lineno);
+        }
+        if (l->bt_len) {
                 char **s = bt_syms(l->bt, l->bt_len);
-
-                if (!s) return &PL_sv_undef;
-                ret = newSVpvn(s[0], strlen(s[0]));
+                if (s) {
+                        if (l->f)
+                                sv_catpvs(ret, "\n");
+                        sv_catpv(ret, s[0]);
+                        for (uint32_t i = 1; i < l->bt_len; ++i)
+                                sv_catpvf(ret, "\n%s", s[i]);
+                }
                 free(s);
         }
-
         return ret;
 }
 
diff --git a/mwrap_core.h b/mwrap_core.h
index 179f136..ec08ee1 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -67,6 +67,7 @@ typedef void COP;
  * hardly anybody still uses it).
  */
 static size_t total_bytes_inc, total_bytes_dec;
+static uint32_t bt_req_depth;
 
 #if MWRAP_PERL
 extern pthread_key_t __attribute__((weak)) PL_thr_key;
@@ -131,7 +132,22 @@ static void *my_mempcpy(void *dest, const void *src, size_t n)
 
 /* stolen from glibc: */
 #define RETURN_ADDRESS(nr) \
-  (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
+  __builtin_extract_return_addr(__builtin_return_address(nr))
+
+#define SRC_LOC_BT(bt) union stk_bt bt; do { \
+        uint32_t depth = locating ? 1 : bt_req_depth; \
+        switch (depth) { \
+        case 0: \
+        case 1: bt.sl.bt_len = 1; bt.sl.bt[0] = RETURN_ADDRESS(0); break; \
+        default: /* skip 1st level of BT since thats our function */ \
+                mwrap_assert(bt_req_depth <= MWRAP_BT_MAX); \
+                ++locating; \
+                long n = (long)backtrace(&bt.sl.bt[-1], bt_req_depth); \
+                --locating; \
+                bt.sl.bt_len = n <= 1 ? 0 : (uint32_t)n - 1; \
+                if (n > 1) mwrap_assert(bt.sl.bt[0] == RETURN_ADDRESS(0)); \
+        } \
+} while (0)
 
 /*
  * only for interpreted sources (Perl/Ruby/etc), not backtrace_symbols* files
@@ -163,6 +179,11 @@ struct src_loc {
         void *bt[];
 };
 
+#ifdef static_assert
+static_assert(sizeof(struct src_file *) == sizeof(size_t),
+                "size_t is the same size as a pointer");
+#endif
+
 /*
  * Every allocation has this in the header, maintain alignment with malloc
  * Do not expose this to Perl code because of use-after-free concerns.
@@ -180,12 +201,16 @@ struct alloc_hdr {
         size_t size;
 };
 
-static MWRAP_TSD union {
-        char kbuf[sizeof(struct src_file) + PATH_MAX];
-        char btbuf[sizeof(struct src_loc) + sizeof(uintptr_t) * MWRAP_BT_MAX];
-        struct src_file src_file;
-        struct src_loc src_loc;
-} tsd;
+/* on-stack structures */
+union stk_sf {
+        struct src_file sf;
+        char buf_[sizeof(struct src_file) + PATH_MAX];
+};
+
+union stk_bt {
+        struct src_loc sl;
+        char buf_[sizeof(struct src_loc) + sizeof(void *) * MWRAP_BT_MAX];
+};
 
 static struct alloc_hdr *ptr2hdr(void *p)
 {
@@ -207,6 +232,11 @@ static size_t bt_bytelen(const struct src_loc *l)
         return sizeof(l->bt[0]) * l->bt_len;
 }
 
+static size_t src_loc_hash_len(const struct src_loc *l)
+{
+        return sizeof(l->f) + sizeof(l->lineno) + + bt_bytelen(l);
+}
+
 static int loc_eq(struct cds_lfht_node *node, const void *key)
 {
         const struct src_loc *existing;
@@ -214,10 +244,8 @@ static int loc_eq(struct cds_lfht_node *node, const void *key)
 
         existing = caa_container_of(node, struct src_loc, hnode);
 
-        return (k->f == existing->f &&
-                k->lineno == existing->lineno &&
-                k->bt_len == existing->bt_len &&
-                !memcmp(k->bt, existing->bt, bt_bytelen(k)));
+        return (k->bt_len == existing->bt_len &&
+                !memcmp(&k->f, &existing->f, src_loc_hash_len(k)));
 }
 
 static int fn_eq(struct cds_lfht_node *node, const void *key)
@@ -314,8 +342,7 @@ static uint32_t do_hash(const void *p, size_t len)
 
 static void hash_src_loc(struct src_loc *l)
 {
-        l->loc_hash = do_hash(&l->f, sizeof(l->f) + sizeof(l->lineno) +
-                                + bt_bytelen(l));
+        l->loc_hash = do_hash(&l->f, src_loc_hash_len(l));
 }
 
 static struct src_file *src_file_get(struct cds_lfht *t, struct src_file *k,
@@ -338,29 +365,34 @@ static struct src_file *src_file_get(struct cds_lfht *t, struct src_file *k,
         return cur ? caa_container_of(cur, struct src_file, nd) : NULL;
 }
 
-static struct src_loc *assign_line(size_t size, const char *fn, unsigned lineno)
+#if !MWRAP_PERL
+#        define CopFILE(cop) NULL
+#        define CopLINE(cop) 0
+#endif
+static struct src_loc *assign_line(size_t size, const COP *cop,
+                                struct src_loc *sl)
 {
         /* avoid vsnprintf or anything which could call malloc here: */
-        size_t len;
+        if (!cop) return NULL;
+        const char *fn = CopFILE(cop);
+        if (!fn) return NULL;
+        unsigned lineno = CopLINE(cop);
         struct src_file *f;
-        struct src_file *k = &tsd.src_file;
-        struct src_loc *l;
+        union stk_sf sf;
         struct cds_lfht_node *cur;
         struct cds_lfht *t = CMM_LOAD_SHARED(files);
 
         mwrap_assert(t);
 
-        if (!fn)
-                return NULL;
-        len = strlen(fn);
+        size_t len = strlen(fn);
         if (len >= PATH_MAX)
                 len = PATH_MAX - 1;
 again:
-        f = src_file_get(t, k, fn, len);
+        f = src_file_get(t, &sf.sf, fn, len);
         if (!f) { /* doesn't exist, add a new one */
                 f = real_malloc(sizeof(*f) + len + 1);
                 if (!f) return NULL;
-                memcpy(f, k, sizeof(*f) + len + 1);
+                memcpy(f, &sf.sf, sizeof(*f) + len + 1);
                 cur = cds_lfht_add_unique(t, f->fn_hash, fn_eq, f, &f->nd);
                 if (cur != &f->nd) { /* lost race */
                         rcu_read_unlock();
@@ -369,41 +401,34 @@ again:
                         goto again;
                 }
         }
-        l = &tsd.src_loc;
-        l->total = size;
-        l->f = f;
-        l->lineno = lineno;
-        l->bt_len = 0;
-        hash_src_loc(l);
-        return totals_add_rcu(l);
+        sl->total = size;
+        sl->f = f;
+        sl->lineno = lineno;
+        if (f && !bt_req_depth)
+                sl->bt_len = 0;
+        hash_src_loc(sl);
+        return totals_add_rcu(sl);
 }
 
 static struct src_loc *
-update_stats_rcu_lock(size_t *generation, size_t size, uintptr_t caller)
+update_stats_rcu_lock(size_t *generation, size_t size, struct src_loc *sl)
 {
-        struct src_loc *k, *ret = 0;
         struct cds_lfht *t = CMM_LOAD_SHARED(totals);
-        const COP *cop = NULL;
+        struct src_loc *ret = NULL;
 
         if (caa_unlikely(!t)) return 0; /* not initialized */
         if (locating++) goto out; /* do not recurse into another *alloc */
 
         *generation = uatomic_add_return(&total_bytes_inc, size);
-        cop = mwp_curcop();
+        const COP *cop = mwp_curcop();
         rcu_read_lock();
-#if MWRAP_PERL
-        if (cop)
-                ret = assign_line(size, OutCopFILE(cop), CopLINE(cop));
-#endif /* MWRAP_PERL */
+        ret = assign_line(size, cop, sl);
         if (!ret) { /* no associated Perl code, just C/C++ */
-                k = &tsd.src_loc;
-                k->total = size;
-                k->f = NULL;
-                k->lineno = 0;
-                k->bt[0] = (void *)caller;
-                k->bt_len = 1;
-                hash_src_loc(k);
-                ret = totals_add_rcu(k);
+                sl->total = size;
+                sl->f = NULL;
+                sl->lineno = 0;
+                hash_src_loc(sl);
+                ret = totals_add_rcu(sl);
         }
 out:
         --locating;
@@ -482,7 +507,7 @@ static bool is_power_of_two(size_t n)
 }
 
 static int
-mwrap_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
+mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl)
 {
         struct src_loc *l;
         struct alloc_hdr *h;
@@ -507,7 +532,7 @@ mwrap_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
             __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
                 return ENOMEM;
 
-        l = update_stats_rcu_lock(&generation, size, caller);
+        l = update_stats_rcu_lock(&generation, size, sl);
 
         real = real_malloc(asize);
         if (real) {
@@ -533,13 +558,15 @@ static void *memalign_result(int err, void *p)
 void *memalign(size_t alignment, size_t size)
 {
         void *p = NULL;
-        int err = mwrap_memalign(&p, alignment, size, RETURN_ADDRESS(0));
+        SRC_LOC_BT(bt);
+        int err = mwrap_memalign(&p, alignment, size, &bt.sl);
         return memalign_result(err, p);
 }
 
 int posix_memalign(void **p, size_t alignment, size_t size)
 {
-        return mwrap_memalign(p, alignment, size, RETURN_ADDRESS(0));
+        SRC_LOC_BT(bt);
+        return mwrap_memalign(p, alignment, size, &bt.sl);
 }
 
 /* these aliases aren't needed for glibc, not sure about other libcs... */
@@ -549,12 +576,10 @@ void cfree(void *) __attribute__((__nothrow__))
 
 void *valloc(size_t size)
 {
-        void *p = NULL;
-        int err;
-
         ensure_initialization();
-        err = mwrap_memalign(&p, mparams.page_size,
-                                size, RETURN_ADDRESS(0));
+        SRC_LOC_BT(bt);
+        void *p = NULL;
+        int err = mwrap_memalign(&p, mparams.page_size, size, &bt.sl);
         return memalign_result(err, p);
 }
 
@@ -576,7 +601,6 @@ static size_t size_align(size_t size, size_t alignment)
 void *pvalloc(size_t size)
 {
         void *p = NULL;
-        int err;
 
         ensure_initialization();
 
@@ -585,24 +609,23 @@ void *pvalloc(size_t size)
                 return 0;
         }
         size = size_align(size, mparams.page_size);
-        err = mwrap_memalign(&p, mparams.page_size,
-                                size, RETURN_ADDRESS(0));
+        SRC_LOC_BT(bt);
+        int err = mwrap_memalign(&p, mparams.page_size, size, &bt.sl);
         return memalign_result(err, p);
 }
 
 void *malloc(size_t size)
 {
-        struct src_loc *l;
-        struct alloc_hdr *h;
         size_t asize;
-        void *p;
-        size_t generation = 0;
 
         if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
                 goto enomem;
 
-        l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0));
-        p = h = real_malloc(asize);
+        size_t generation = 0;
+        SRC_LOC_BT(bt);
+        struct src_loc *l = update_stats_rcu_lock(&generation, size, &bt.sl);
+        struct alloc_hdr *h;
+        void *p = h = real_malloc(asize);
         if (h) {
                 alloc_insert_rcu(l, h, size, h, generation);
                 p = hdr2ptr(h);
@@ -617,9 +640,6 @@ enomem:
 
 void *calloc(size_t nmemb, size_t size)
 {
-        void *p;
-        struct src_loc *l;
-        struct alloc_hdr *h;
         size_t asize;
         size_t generation = 0;
 
@@ -631,8 +651,10 @@ void *calloc(size_t nmemb, size_t size)
                 errno = ENOMEM;
                 return 0;
         }
-        l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0));
-        p = h = real_malloc(asize);
+        struct alloc_hdr *h;
+        SRC_LOC_BT(bt);
+        struct src_loc *l = update_stats_rcu_lock(&generation, size, &bt.sl);
+        void *p = h = real_malloc(asize);
         if (p) {
                 alloc_insert_rcu(l, h, size, h, generation);
                 p = hdr2ptr(h);
@@ -645,11 +667,7 @@ void *calloc(size_t nmemb, size_t size)
 
 void *realloc(void *ptr, size_t size)
 {
-        void *p;
-        struct src_loc *l;
-        struct alloc_hdr *h;
         size_t asize;
-        size_t generation = 0;
 
         if (!size) {
                 free(ptr);
@@ -659,8 +677,11 @@ void *realloc(void *ptr, size_t size)
                 errno = ENOMEM;
                 return 0;
         }
-        l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0));
-        p = h = real_malloc(asize);
+        struct alloc_hdr *h;
+        size_t generation = 0;
+        SRC_LOC_BT(bt);
+        struct src_loc *l = update_stats_rcu_lock(&generation, size, &bt.sl);
+        void *p = h = real_malloc(asize);
         if (p) {
                 alloc_insert_rcu(l, h, size, h, generation);
                 p = hdr2ptr(h);
@@ -752,15 +773,14 @@ static struct src_loc *src_loc_lookup(const char *str, size_t len)
         char *c = memrchr(str, ':', len);
         const char *end = str + len;
         unsigned lineno;
-        size_t fn_len;
-        struct src_file *f;
         struct src_loc *l = NULL;
         struct cds_lfht *t = CMM_LOAD_SHARED(files);
+        union stk_sf sf;
 
         if (!c || c == end || !t)
                 return NULL;
 
-        fn_len = c - str;
+        size_t fn_len = c - str;
         c++;
         if (*c == '-') {
                 lineno = UINT_MAX;
@@ -774,16 +794,16 @@ static struct src_loc *src_loc_lookup(const char *str, size_t len)
                 }
         }
         rcu_read_lock();
-        f = src_file_get(t, &tsd.src_file, str, fn_len);
+        struct src_file *f = src_file_get(t, &sf.sf, str, fn_len);
         t = CMM_LOAD_SHARED(totals);
         if (f && t) {
-                struct src_loc *k = &tsd.src_loc;
+                struct src_loc k;
 
-                k->f = f;
-                k->lineno = lineno;
-                k->bt_len = 0;
-                hash_src_loc(k);
-                l = src_loc_get(t, k);
+                k.f = f;
+                k.lineno = lineno;
+                k.bt_len = 0;
+                hash_src_loc(&k);
+                l = src_loc_get(t, &k);
         }
         rcu_read_unlock();
         return l;
@@ -815,13 +835,12 @@ static void dump_destructor(void)
                         (dump_path += sizeof("dump_path")) &&
                         *dump_path) {
                 char *end = strchr(dump_path, ',');
+                char buf[PATH_MAX];
                 if (end) {
-                        char *tmp = tsd.kbuf;
-                        mwrap_assert((end - dump_path) <
-                                        (intptr_t)sizeof(tsd.kbuf));
-                        end = mempcpy(tmp, dump_path, end - dump_path);
+                        mwrap_assert((end - dump_path) < (intptr_t)sizeof(buf));
+                        end = mempcpy(buf, dump_path, end - dump_path);
                         *end = 0;
-                        dump_path = tmp;
+                        dump_path = buf;
                 }
                 dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
                                 0666);
@@ -884,31 +903,48 @@ static void mwrap_reset(void)
 
 static struct src_loc *mwrap_get(const char *str, size_t len)
 {
-        struct src_loc *l = NULL;
         void *p;
 
-        if (len >= PATH_MAX)
-                return l;
-        if (extract_addr(str, len, &p)) {
+        if (!extract_addr(str, len, &p))
+                return src_loc_lookup(str, len);
+
+        union stk_bt k;
+        struct cds_lfht *t = CMM_LOAD_SHARED(totals);
+
+        if (!t) return NULL;
+        k.sl.f = NULL;
+        k.sl.lineno = 0;
+        k.sl.bt[0] = p;
+        k.sl.bt_len = 1;
+        hash_src_loc(&k.sl);
+        rcu_read_lock();
+        struct src_loc *l = src_loc_get(t, &k.sl);
+        rcu_read_unlock();
+        return l;
+}
+
+static struct src_loc *mwrap_get_bin(const char *buf, size_t len)
+{
+        if ((len % sizeof(void *)) == 0 && len >= (2 * sizeof(void *))) {
+                union stk_bt k;
                 struct cds_lfht *t = CMM_LOAD_SHARED(totals);
-                struct src_loc *k;
+                if (!t) return NULL;
 
-                if (!t)
-                        return l;
-                k = &tsd.src_loc;
+                k.sl.bt_len = len / sizeof(void *);
+                k.sl.bt_len -= 2; /* lineno + src_file *f */
 
-                k->f = NULL;
-                k->lineno = 0;
-                k->bt[0] = p;
-                k->bt_len = 1;
-                hash_src_loc(k);
+                if (k.sl.bt_len > MWRAP_BT_MAX)
+                        return NULL;
+
+                memcpy(&k.sl.f, buf, len);
+
+                hash_src_loc(&k.sl);
                 rcu_read_lock();
-                l = src_loc_get(t, k);
+                struct src_loc *l = src_loc_get(t, &k.sl);
                 rcu_read_unlock();
-        } else {
-                l = src_loc_lookup(str, len);
+                return l;
         }
-        return l;
+        return NULL;
 }
 
 static const char *mwrap_env;
@@ -952,5 +988,20 @@ __attribute__((constructor)) static void mwrap_ctor(void)
         CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL));
         CHECK(int, 0, pthread_atfork(atfork_prepare, atfork_parent,
                                      atfork_child));
+
+        if (mwrap_env) {
+                const char *bt = strstr(mwrap_env, "bt:");
+                if (bt) {
+                        bt += sizeof("bt");
+                        errno = 0;
+                        char *end;
+                        unsigned long n = strtoul(bt, &end, 10);
+                        if (n && !errno && (*end == ',' || *end == 0)) {
+                                if (n >= MWRAP_BT_MAX)
+                                        n = MWRAP_BT_MAX;
+                                bt_req_depth = (uint32_t)n;
+                        }
+                }
+        }
         --locating;
 }
diff --git a/mwrap_httpd.h b/mwrap_httpd.h
index bfa723d..bd37fb2 100644
--- a/mwrap_httpd.h
+++ b/mwrap_httpd.h
@@ -102,6 +102,7 @@ struct h1_src_loc {
         size_t live;
         size_t max_life;
         off_t lname_len;
+        const struct src_loc *sl;
         char *loc_name;
 };
 
@@ -333,76 +334,76 @@ static void write_html(FILE *fp, const char *s, size_t len)
         }
 }
 
-static void write_uri(FILE *fp, const char *s, size_t len)
+/* URI-safe base-64 (RFC 4648) */
+static void write_b64_url(FILE *fp, const uint8_t *in, size_t len)
 {
-#define CGI_URI_OK(x) \
-        ((x >= 'a' && x <= 'z') || \
-         (x >= 'A' && x <= 'Z') || \
-         (x >= '0' && x <= '9') || \
-         (x == '.' || x == '-' || x == '_' || x == '~'))
-        for (; len--; ++s) {
-                if (caa_likely(CGI_URI_OK(*s))) {
-                        fputc(*s, fp);
-                } else {
-                        static const char cgi_digitmap[] = "0123456789ABCDEF";
-                        unsigned char c = *s;
-                        char x[3];
-
-                        x[2] = cgi_digitmap[(c % 16)];
-                        x[1] = cgi_digitmap[((c / 16) % 16)];
-                        x[0] = '%';
-                        fwrite(x, sizeof(x), 1, fp);
-                }
+        static const uint8_t b64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                        "abcdefghijklmnopqrstuvwxyz" "0123456789-_";
+        uint8_t o[4];
+        while (len > 3) {
+                o[0] = b64[(in[0] >> 2) & 0x3f];
+                o[1] = b64[((in[0] << 4) | (in[1] >> 4)) & 0x3f];
+                o[2] = b64[((in[1] << 2) | (in[2] >> 6)) & 0x3f];
+                o[3] = b64[in[2] & 0x3f];
+                fwrite(o, sizeof(o), 1, fp);
+                len -= 3;
+                in += 3;
+        }
+        if (len) {
+                size_t i = 2;
+
+                o[0] = b64[(in[0] >> 2) & 0x3f];
+                o[1] = b64[((in[0] << 4) | (--len ? (in[1] >> 4) : 0)) & 0x3f];
+                if (len)
+                        o[i++] = b64[((in[1] << 2) |
+                                        (--len ? in[2] >> 6 : 0)) & 0x3f];
+                if (len)
+                        o[i++] = b64[in[2] & 0x3f];
+                fwrite(o, i, 1, fp);
         }
-#undef CGI_URI_OK
-}
-
-static bool is_hex(int x)
-{
-        return (((x) >= '0' && (x) <= '9') ||
-               ((x) >= 'a' && (x) <= 'f') ||
-               ((x) >= 'A' && (x) <= 'F'));
-}
-
-static int xtoupper(int x)
-{
-        return (x >= 'a' && x <= 'f') ? (x & ~0x20) : x;
-}
-
-static int hexchar_to_int(int x)
-{
-        return (x < 'A') ? (x - '0') : (xtoupper(x) - 'A' + 10);
-}
-
-static int hexpair_to_int(int x1, int x2)
-{
-        return ((hexchar_to_int(x1) << 4) | hexchar_to_int(x2));
 }
 
 /* unescapes @s in-place and adjusts @len */
-static const char *uri_unescape(const char *s, size_t *len)
+static bool b64_url_decode(const void *ptr, size_t *len)
 {
-        union { const char *in; char *out; } deconst;
-        size_t orig = *len;
-        char *out;
-
-        deconst.in = s;
-        out = deconst.out;
-        for (; orig--; s++) {
-                if (caa_unlikely(*s == '%') && orig > 1 &&
-                                caa_likely(is_hex(s[1])) &&
-                                caa_likely(is_hex(s[2]))) {
-                        orig -= 2;
-                        *len -= 2;
-                        *out++ = hexpair_to_int(s[1], s[2]);
-                        s += 2;
-                } else {
-                        *out++ = *s;
+        union { const void *in; uint8_t *out; } deconst;
+        const uint8_t *in = ptr;
+        uint8_t u = 0;
+
+        deconst.in = ptr;
+        uint8_t *out = deconst.out;
+
+        for (size_t i = 0; i < *len; ++i) {
+                uint8_t c = in[i];
+
+                switch (c) {
+                case 'A' ... 'Z': c -= 'A'; break;
+                case 'a' ... 'z': c -= ('a' - 26); break;
+                case '0' ... '9': c -= ('0' - 52); break;
+                case '-': c = 62; break;
+                case '_': c = 63; break;
+                default: return false;
+                }
+
+                mwrap_assert(c <= 63);
+                switch (i % 4) {
+                case 0: u = c << 2; break;
+                case 1:
+                        *out++ = u | c >> 4;
+                        u = (c & 0xf) << 4;
+                        break;
+                case 2:
+                        *out++ = u | c >> 2;
+                        u = (c & 0x3) << 6;
+                        break;
+                case 3: *out++ = u | c;
                 }
         }
-        return deconst.in;
+        *len = out - in;
+        return true;
 }
 
+/* keep this consistent with Mwrap.xs location_string */
 static off_t write_loc_name(FILE *fp, const struct src_loc *l)
 {
         off_t beg = ftello(fp);
@@ -417,12 +418,21 @@ static off_t write_loc_name(FILE *fp, const struct src_loc *l)
                         FPUTS(":-", fp);
                 else
                         fprintf(fp, ":%zu", l->lineno);
-        } else {
+        }
+        if (l->bt_len) {
                 char **s = bt_syms(l->bt, l->bt_len);
                 if (!s) return -1;
+                if (l->f) fputc('\n', fp);
+
+                /* omit local " [$ADDRESS]" if doing deep backtraces */
+                for (uint32_t i = 0; i < l->bt_len; ++i) {
+                        char *c = memrchr(s[i], '[', strlen(s[i]));
+                        if (c && c > (s[i] + 2) && c[-1] == ' ')
+                                c[-1] = '\0';
+                }
 
                 fputs(s[0], fp);
-                for (uint32_t i = 1; i < l->bt_len; i++) {
+                for (uint32_t i = 1; i < l->bt_len; ++i) {
                         fputc('\n', fp);
                         fputs(s[i], fp);
                 }
@@ -445,7 +455,6 @@ static struct h1_src_loc *accumulate(unsigned long min, size_t *hslc, FILE *lp)
         struct cds_lfht *t = CMM_LOAD_SHARED(totals);
         struct cds_lfht_iter iter;
         struct src_loc *l;
-        ++locating;
         if (t) cds_lfht_for_each_entry(t, &iter, l, hnode) {
                 size_t freed = uatomic_read(&l->freed_bytes);
                 size_t total = uatomic_read(&l->total);
@@ -461,11 +470,10 @@ static struct h1_src_loc *accumulate(unsigned long min, size_t *hslc, FILE *lp)
                                 (long double)hsl.frees) :
                         HUGE_VAL;
                 hsl.max_life = uatomic_read(&l->max_lifespan);
+                hsl.sl = l;
                 hsl.lname_len = write_loc_name(lp, l);
                 fwrite(&hsl, sizeof(hsl), 1, fp);
         }
-        --locating;
-        mwrap_assert(!locating);
         rcu_read_unlock();
 
         struct h1_src_loc *hslv;
@@ -486,9 +494,11 @@ static enum mw_qev each_at(struct mw_h1 *h1, struct mw_h1req *h1r)
         size_t len = h1r->path_len - (sizeof("/at/") - 1);
         size_t min = 0;
 
-        loc = uri_unescape(loc, &len);
-        if (len >= PATH_MAX) return h1_400(h1);
-        struct src_loc *l = mwrap_get(loc, len);
+        if (!b64_url_decode(loc, &len) || len >= PATH_MAX)
+                return h1_400(h1);
+
+        struct src_loc *l = mwrap_get_bin(loc, len);
+
         if (!l) return h1_404(h1);
 
         struct mw_membuf lname;
@@ -503,7 +513,9 @@ static enum mw_qev each_at(struct mw_h1 *h1, struct mw_h1req *h1r)
         if (!fp) return h1_close(h1);
         FPUTS("<html><head><title>", fp);
         write_html(fp, lname.ptr, lname.len);
-        FPUTS("</title></head><body><p>live allocations at ", fp);
+        FPUTS("</title></head><body><p>live allocations at", fp);
+        if (bt_req_depth) FPUTS("<br/>", fp);
+        else fputc('\n', fp);
         write_html(fp, lname.ptr, lname.len);
         free(lname.ptr);
 
@@ -514,7 +526,6 @@ static enum mw_qev each_at(struct mw_h1 *h1, struct mw_h1req *h1r)
                 "<th>address</th></tr>", fp);
 
         rcu_read_lock();
-        ++locating;
         struct alloc_hdr *h;
         cds_list_for_each_entry_rcu(h, &l->allocs, anode) {
                 size_t size = uatomic_read(&h->size);
@@ -522,8 +533,6 @@ static enum mw_qev each_at(struct mw_h1 *h1, struct mw_h1req *h1r)
                         fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%p</td>\n",
                                 size, h->as.live.gen, h->real);
         }
-        --locating;
-        mwrap_assert(!locating);
         rcu_read_unlock();
         FPUTS("</table></body></html>", fp);
         return h1_200(h1, fp, &mb);
@@ -574,7 +583,11 @@ static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
         size_t age = uatomic_read(&total_bytes_inc);
         fprintf(fp, "<p>Current age: %zu (live: %zu)",
                 age, age - uatomic_read(&total_bytes_dec));
-        FPUTS("<table><tr>", fp);
+
+        if (bt_req_depth) /* need borders to distinguish multi-level traces */
+                FPUTS("<table\nborder=1><tr>", fp);
+        else /* save screen space if only tracing one line */
+                FPUTS("<table><tr>", fp);
 
         int (*cmp)(const void *, const void *) = NULL;
         for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
@@ -603,7 +616,11 @@ static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
                         hsl->bytes, hsl->allocations, hsl->frees,
                         hsl->live, hsl->mean_life, hsl->max_life);
                 FPUTS("<td><a\nhref=\"../at/", fp);
-                write_uri(fp, hsl->loc_name, hsl->lname_len);
+
+                /* yes, we're writing our memory addresses into the URI */
+                write_b64_url(fp, (const void *)&hsl->sl->f,
+                                src_loc_hash_len(hsl->sl));
+
                 FPUTS("\">", fp);
                 write_html(fp, hsl->loc_name, hsl->lname_len);
                 FPUTS("</a></td></tr>", fp);
@@ -1025,6 +1042,7 @@ static void *h1d_run(void *x) /* pthread_create cb */
         int rc;
         struct mw_h1 *h1, *nxt;
         enum mw_qev ev;
+        locating = 1; /* don't report our own memory use */
 
         for (; uatomic_read(&h1d->alive); ) {
                 if (poll_add(h1d, h1d->lfd, POLLIN))
diff --git a/t/mwrap-httpd.t b/t/mwrap-httpd.t
index 73b704e..aec1780 100644
--- a/t/mwrap-httpd.t
+++ b/t/mwrap-httpd.t
@@ -54,20 +54,17 @@ is(send($c, 'GET', MSG_NOSIGNAL), 3, 'trickled 3 bytes') or diag "send: $!";
 my $cout = "$mwrap_tmp/cout";
 my $rc = system(qw(curl -vsSf --unix-socket), $sock, '-o', $cout,
                 "http://0/$pid/each/2000");
+my $curl_unix;
 SKIP: {
         skip 'curl lacks --unix-socket support', 1 if $rc == 512;
         is($rc, 0, 'curl /each');
         unlink($cout);
+        $curl_unix = 1;
 
         $rc = system(qw(curl -vsSf --unix-socket), $sock, '-o', $cout,
                 "http://0/$pid/each/2000");
         is($rc, 0, 'curl /each');
         unlink($cout);
-
-        skip 'no reset on if interactive', 1 if $ENV{INTERACTIVE};
-        $rc = system(qw(curl -vsSf --unix-socket), $sock, '-o', $cout,
-                '-d', 'x=y', "http://0/$pid/reset");
-        is($rc, 0, 'curl /reset');
 };
 
 {
@@ -112,7 +109,7 @@ SKIP: {
                 ok($t);
                 $res = $http->get("http://$h:$p/$pid/at/$1");
                 ok($res->{success}, '/at/$LOCATION endpoint');
-                like($res->{content}, qr!\blive allocations at !,
+                like($res->{content}, qr!\blive allocations at\b!,
                         'live allocations shown');
         } else {
                 fail($t);
@@ -124,6 +121,14 @@ SKIP: {
         }
 }
 
+SKIP: {
+        skip 'no reset w/o curl --unix-socket', 1 if !$curl_unix;
+        $rc = system(qw(curl -vsSf --unix-socket), $sock, '-o', $cout,
+                '-d', 'x=y', "http://0/$pid/reset");
+        is($rc, 0, 'curl /reset');
+};
+
+
 diag slurp($cout) if $ENV{V};
 $cleanup->();
 ok(!-e $sock, 'socket unlinked after cleanup');