From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id A5AD41F601 for ; Fri, 2 Dec 2022 11:17:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1669979844; bh=gQNl50HfRSwuTSzngUhVKYQQKn/x5ZQywGXJlh17AXM=; h=From:To:Subject:Date:From; b=uH3pJZaOxUGURQecyB46cU32I+akdlrrZYJ5jygJ6zTkMSfXQD0mhD1JaxdWR/AJk Jk0gL56mkm26OWjvgdUARjCYkC3XgDz0EyanzTrThkbVCTKvGU/0MD41WvGvbwL8cK g0KDYmK4Ifxr0Y5bjBft9cPIMKyM9UMGqxj/6Eb4= From: Eric Wong To: mwrap-perl@80x24.org Subject: [RFC] pool filenames into separate table Date: Fri, 2 Dec 2022 11:17:24 +0000 Message-Id: <20221202111724.822982-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This may save memory with large projects --- Mwrap.xs | 56 +++++----- mwrap_core.h | 289 ++++++++++++++++++++++++++++++++++----------------- 2 files changed, 222 insertions(+), 123 deletions(-) diff --git a/Mwrap.xs b/Mwrap.xs index 568c246..ad70ae3 100644 --- a/Mwrap.xs +++ b/Mwrap.xs @@ -16,8 +16,14 @@ static SV *location_string(struct src_loc *l) { SV *ret; - if (loc_is_addr(l)) { - char **s = backtrace_symbols((void *)l->k, 1); + if (l->f) { + ret = newSV(0); + if (l->lineno == UINT_MAX) + sv_setpvf(ret, "%s:-", l->f->fn); + else + sv_setpvf(ret, "%s:%u", l->f->fn, l->lineno); + } else { + char **s = backtrace_symbols((void *)l->bt, (int)l->bt_len); if (!s) { fprintf(stderr, "backtrace_symbols => NULL: %s\n", @@ -27,8 +33,6 @@ static SV *location_string(struct src_loc *l) ret = newSVpvn(s[0], strlen(s[0])); free(s); - } else { - ret = newSVpvn(l->k, l->capa - 1); } return ret; @@ -153,42 +157,34 @@ mwrap_get(loc) PREINIT: STRLEN len; const char *str; - struct src_loc *k = &tsd.src_loc; uintptr_t p; - struct cds_lfht_iter iter; - struct cds_lfht_node *cur; - struct cds_lfht *t; - struct src_loc *l = NULL; + struct src_loc *l; CODE: ++locating; if (!SvPOK(loc)) XSRETURN_UNDEF; str = SvPV(loc, len); - if (len > PATH_MAX) + if (len >= PATH_MAX) XSRETURN_UNDEF; if (extract_addr(str, len, (void **)&p)) { - memcpy(k->k, &p, sizeof(p)); - k->capa = 0; - hash_loc(k, sizeof(p)); + struct cds_lfht *t = CMM_LOAD_SHARED(totals); + struct src_loc *k; + + if (!t) + XSRETURN_UNDEF; + k = &tsd.src_loc; + + k->f = NULL; + k->lineno = 0; + k->bt[0] = p; + k->bt_len = 1; + hash_src_loc(k); + rcu_read_lock(); + l = src_loc_get(t, k); + rcu_read_unlock(); } else { - memcpy(k->k, str, len + 1); - k->capa = len + 1; - hash_loc(k, k->capa); + l = src_loc_lookup(str, len); } - - if (!k) - XSRETURN_UNDEF; - - rcu_read_lock(); - t = CMM_LOAD_SHARED(totals); - if (!t) goto out_unlock; - - cds_lfht_lookup(t, k->hval, loc_eq, k, &iter); - cur = cds_lfht_iter_get_node(&iter); - if (cur) - l = caa_container_of(cur, struct src_loc, hnode); -out_unlock: - rcu_read_unlock(); RETVAL = l; OUTPUT: RETVAL diff --git a/mwrap_core.h b/mwrap_core.h index 11b7346..181898f 100644 --- a/mwrap_core.h +++ b/mwrap_core.h @@ -9,6 +9,11 @@ # define MWRAP_PERL 0 #endif +#ifndef MWRAP_BT_MAX +# define MWRAP_BT_MAX 32 +#endif + + #if MWRAP_PERL # include "EXTERN.h" # include "perl.h" @@ -67,7 +72,7 @@ static MWRAP_TSD size_t locating; #ifndef PERL_IMPLICIT_CONTEXT static size_t *root_locating; /* determines if PL_curcop is our thread */ #endif -static struct cds_lfht *totals; +static struct cds_lfht *files, *totals; union padded_mutex { pthread_mutex_t mtx; char pad[64]; /* cache alignment for common CPUs */ @@ -88,14 +93,19 @@ static union padded_mutex mutexes[MUTEX_NR] = { #endif }; +#ifdef static_assert +/* we only use uint32_t for pathname storage for struct alignment */ +static_assert(UINT32_MAX > PATH_MAX, "UINT32_MAX > PATH_MAX"); +#endif + static pthread_mutex_t *mutex_assign(void) { return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx; } -static struct cds_lfht *lfht_new(void) +static struct cds_lfht *lfht_new(size_t size) { - return cds_lfht_new(16384, 1, 0, CDS_LFHT_AUTO_RESIZE, 0); + return cds_lfht_new(size, 1, 0, CDS_LFHT_AUTO_RESIZE, 0); } static void reset_mutexes(void) @@ -118,32 +128,15 @@ static void *my_mempcpy(void *dest, const void *src, size_t n) #define RETURN_ADDRESS(nr) \ (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr))) -#define UINT2STR_MAX (sizeof(unsigned) == 4 ? 10 : 19) -static char *uint2str(unsigned num, char *dst, size_t *size) -{ - if (num <= 9) { - *size -= 1; - *dst++ = (char)(num + '0'); - return dst; - } else { - char buf[UINT2STR_MAX]; - char *end = buf + sizeof(buf); - char *p = end; - size_t adj; - - do { - *size -= 1; - *--p = (char)((num % 10) + '0'); - num /= 10; - } while (num && *size); - - if (!num) { - adj = end - p; - return mempcpy(dst, p, adj); - } - } - return NULL; -} +/* + * only for interpreted sources (Perl/Ruby/etc), not backtrace_symbols files + */ +struct src_file { + struct cds_lfht_node nd; /* <=> files table */ + uint32_t fn_hash; + uint32_t fn_len; /* < PATH_MAX */ + char fn[]; /* NUL-terminated */ +}; /* allocated via real_malloc, immortal for safety reasons */ struct src_loc { @@ -154,11 +147,14 @@ struct src_loc { size_t frees; size_t age_total; /* (age_total / frees) => mean age at free */ size_t max_lifespan; - struct cds_lfht_node hnode; + struct cds_lfht_node hnode; /* <=> totals table */ struct cds_list_head allocs; /* <=> alloc_hdr.node */ - uint32_t hval; - uint32_t capa; - char k[]; + uint32_t loc_hash; + uint32_t bt_len; + /* next 3 fields contiguous for hash_src_loc(): */ + struct src_file *f; + size_t lineno; + uintptr_t bt[]; }; /* @@ -178,11 +174,10 @@ struct alloc_hdr { size_t size; }; -/* $PATHNAME:$LINENO */ static MWRAP_TSD union { - char kbuf[ - sizeof(struct src_loc) + PATH_MAX + sizeof(":") + UINT2STR_MAX - ]; + char kbuf[sizeof(struct src_file) + PATH_MAX]; + char btbuf[sizeof(struct src_loc) + sizeof(uintptr_t) * MWRAP_BT_MAX]; + struct src_file src_file; struct src_loc src_loc; } tsd; @@ -198,12 +193,12 @@ static void *hdr2ptr(struct alloc_hdr *h) static int loc_is_addr(const struct src_loc *l) { - return l->capa == 0; + return l->f == NULL; } -static size_t loc_size(const struct src_loc *l) +static size_t bt_bytelen(const struct src_loc *l) { - return loc_is_addr(l) ? sizeof(uintptr_t) : l->capa; + return sizeof(l->bt[0]) * l->bt_len; } static int loc_eq(struct cds_lfht_node *node, const void *key) @@ -213,33 +208,53 @@ static int loc_eq(struct cds_lfht_node *node, const void *key) existing = caa_container_of(node, struct src_loc, hnode); - return (k->hval == existing->hval && - k->capa == existing->capa && - memcmp(k->k, existing->k, loc_size(k)) == 0); + return (k->f == existing->f && + k->lineno == existing->lineno && + k->bt_len == existing->bt_len && + !memcmp(k->bt, existing->bt, bt_bytelen(k))); +} + +static int fn_eq(struct cds_lfht_node *node, const void *key) +{ + const struct src_file *existing; + const struct src_file *k = key; + + existing = caa_container_of(node, struct src_file, nd); + + return (k->fn_len == existing->fn_len && + !memcmp(k->fn, existing->fn, k->fn_len)); } -static struct src_loc *totals_add_rcu(struct src_loc *k) +static struct src_loc *src_loc_get(struct cds_lfht *t, const struct src_loc *k) { struct cds_lfht_iter iter; struct cds_lfht_node *cur; - struct src_loc *l = 0; - struct cds_lfht *t; -again: - t = CMM_LOAD_SHARED(totals); - if (!t) return l; mwrap_assert(rcu_read_ongoing()); - cds_lfht_lookup(t, k->hval, loc_eq, k, &iter); + cds_lfht_lookup(t, k->loc_hash, loc_eq, k, &iter); cur = cds_lfht_iter_get_node(&iter); - if (cur) { - l = caa_container_of(cur, struct src_loc, hnode); + return cur ? caa_container_of(cur, struct src_loc, hnode) : NULL; +} + +static struct src_loc *totals_add_rcu(const struct src_loc *k) +{ + struct src_loc *l; + struct cds_lfht *t = CMM_LOAD_SHARED(totals); + if (!t) return NULL; + +again: + l = src_loc_get(t, k); + if (l) { uatomic_add(&l->total, k->total); uatomic_add(&l->allocations, 1); } else { - size_t n = loc_size(k); - l = real_malloc(sizeof(*l) + n); + size_t n = bt_bytelen(k) + sizeof(*k); + struct cds_lfht_node *cur; + + l = real_malloc(n); if (!l) return l; - memcpy(l, k, sizeof(*l) + n); + memcpy(l, k, n); + l->freed_bytes = 0; l->mtx = mutex_assign(); l->age_total = 0; l->max_lifespan = 0; @@ -247,7 +262,7 @@ again: l->frees = 0; l->allocations = 1; CDS_INIT_LIST_HEAD(&l->allocs); - cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode); + cur = cds_lfht_add_unique(t, l->loc_hash, loc_eq, l, &l->hnode); if (cur != &l->hnode) { /* lost race */ rcu_read_unlock(); real_free(l); @@ -277,48 +292,84 @@ static const COP *mwp_curcop(void) return NULL; } -static void hash_loc(struct src_loc *k, size_t len) +static uint32_t do_hash(const void *p, size_t len) { #if defined(XXH3_64bits) union { XXH64_hash_t u64; uint32_t u32[2]; } u; - u.u64 = XXH3_64bits(k->k, len); - k->hval = u.u32[1]; + u.u64 = XXH3_64bits(p, len); + return u.u32[1]; #else - k->hval = jhash(k->k, len, 0xdeadbeef); + return jhash(p, len, 0xdeadbeef); #endif } -static struct src_loc *assign_line(size_t size, const char *file, unsigned line) +static void hash_src_loc(struct src_loc *l) +{ + l->loc_hash = do_hash(&l->f, sizeof(l->f) + sizeof(l->lineno) + + + bt_bytelen(l)); +} + +static struct src_file *src_file_get(struct cds_lfht *t, struct src_file *k, + const char *fn, size_t fn_len) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *cur; + + mwrap_assert(t); /* caller should've bailed if missing */ + if (fn_len >= PATH_MAX) + return NULL; + k->fn_len = (uint32_t)fn_len; + memcpy(k->fn, fn, fn_len); + k->fn[fn_len] = 0; + k->fn_hash = do_hash(k->fn, fn_len); + mwrap_assert(rcu_read_ongoing()); + cds_lfht_lookup(t, k->fn_hash, fn_eq, k, &iter); + cur = cds_lfht_iter_get_node(&iter); + + return cur ? caa_container_of(cur, struct src_file, nd) : NULL; +} + +static struct src_loc *assign_line(size_t size, const char *fn, unsigned lineno) { /* avoid vsnprintf or anything which could call malloc here: */ size_t len; - struct src_loc *k; - char *dst; - size_t uint_size = UINT2STR_MAX; + struct src_file *f; + struct src_file *k = &tsd.src_file; + struct src_loc *l; + struct cds_lfht_node *cur; + struct cds_lfht *t = CMM_LOAD_SHARED(files); + + mwrap_assert(t); - if (!file) + if (!fn) return NULL; - len = strlen(file); - if (len > PATH_MAX) - len = PATH_MAX; - k = &tsd.src_loc; - k->total = size; - dst = mempcpy(k->k, file, len); - *dst++ = ':'; - - if (line == UINT_MAX) /* no line number */ - *dst++ = '-'; - else - dst = uint2str(line, dst, &uint_size); - - mwrap_assert(dst && "bad math"); - *dst = 0; /* terminate string */ - k->capa = (uint32_t)(dst - k->k + 1); - hash_loc(k, k->capa); - return totals_add_rcu(k); + len = strlen(fn); + if (len >= PATH_MAX) + len = PATH_MAX - 1; +again: + f = src_file_get(t, k, fn, len); + if (!f) { /* doesn't exist, add a new one */ + f = real_malloc(sizeof(*f) + len + 1); + if (!f) return NULL; + memcpy(f, k, sizeof(*f) + len + 1); + cur = cds_lfht_add_unique(t, f->fn_hash, fn_eq, f, &f->nd); + if (cur != &f->nd) { /* lost race */ + rcu_read_unlock(); + real_free(f); + rcu_read_lock(); + goto again; + } + } + l = &tsd.src_loc; + l->total = size; + l->f = f; + l->lineno = lineno; + l->bt_len = 0; + hash_src_loc(l); + return totals_add_rcu(l); } static struct src_loc * @@ -338,12 +389,14 @@ update_stats_rcu_lock(size_t *generation, size_t size, uintptr_t caller) if (cop) ret = assign_line(size, OutCopFILE(cop), CopLINE(cop)); #endif /* MWRAP_PERL */ - if (!ret) { + if (!ret) { /* no associated Perl code, just C/C++ */ k = &tsd.src_loc; k->total = size; - memcpy(k->k, &caller, sizeof(caller)); - k->capa = 0; - hash_loc(k, sizeof(caller)); + k->f = NULL; + k->lineno = 0; + k->bt[0] = caller; + k->bt_len = 1; + hash_src_loc(k); ret = totals_add_rcu(k); } out: @@ -633,22 +686,25 @@ static void *dump_to_file(struct dump_arg *a) t = CMM_LOAD_SHARED(totals); if (!t) goto out_unlock; + cds_lfht_for_each_entry(t, &iter, l, hnode) { - const void *p = l->k; if (l->total <= a->min) continue; if (loc_is_addr(l)) { - char **s = backtrace_symbols(p, 1); + char **s = backtrace_symbols((void **)l->bt, 1); + if (s) { - p = s[0]; + fprintf(a->fp, "%16zu %12zu %s\n", + l->total, l->allocations, s[0]); free(s); } else { fprintf(stderr, "backtrace_symbols: %s\n", strerror(errno)); } + } else { + fprintf(a->fp, "%16zu %12zu %s:%u\n", + l->total, l->allocations, l->f->fn, l->lineno); } - fprintf(a->fp, "%16zu %12zu %s\n", - l->total, l->allocations, (const char *)p); } out_unlock: rcu_read_unlock(); @@ -666,6 +722,50 @@ static int extract_addr(const char *str, size_t len, void **p) #endif } +/* str is $PATHNAME:$LINENO, len is strlen(str) */ +static struct src_loc *src_loc_lookup(const char *str, size_t len) +{ + char *c = memrchr(str, ':', len); + const char *end = str + len; + unsigned lineno; + size_t fn_len; + struct src_file *f; + struct src_loc *k, *l = NULL; + struct cds_lfht *t = CMM_LOAD_SHARED(files); + + mwrap_assert(str[len] == 0); + if (!c || c == end || !t) + return NULL; + + fn_len = c - str; + c++; + if (*c == '-') { + lineno = UINT_MAX; + } else { + lineno = 0; + for (; *c; c++) { + if (*c < '0' || *c > '9') + return NULL; + lineno *= 10; + lineno += (*c - '0'); + } + } + rcu_read_lock(); + f = src_file_get(t, &tsd.src_file, str, fn_len); + t = CMM_LOAD_SHARED(totals); + if (f && t) { + struct src_loc *k = &tsd.src_loc; + + k->f = f; + k->lineno = lineno; + k->bt_len = 0; + hash_src_loc(k); + l = src_loc_get(t, k); + } + rcu_read_unlock(); + return l; +} + #ifndef O_CLOEXEC # define O_CLOEXEC 0 #endif @@ -753,7 +853,10 @@ __attribute__((constructor)) static void mwrap_ctor(void) if (!STATIC_MTX_INIT_OK) reset_mutexes(); /* initialize mutexes used by urcu-bp */ - CMM_STORE_SHARED(totals, lfht_new()); + CMM_STORE_SHARED(files, lfht_new(256)); + if (!CMM_LOAD_SHARED(files)) + fprintf(stderr, "failed to allocate files table\n"); + CMM_STORE_SHARED(totals, lfht_new(16384)); if (!CMM_LOAD_SHARED(totals)) fprintf(stderr, "failed to allocate totals table\n"); h = real_malloc(sizeof(struct alloc_hdr));