mwrap (Perl version) user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
* [RFC] pool filenames into separate table
@ 2022-12-02 11:17 Eric Wong
  2022-12-09 19:32 ` [PATCH v2] " Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2022-12-02 11:17 UTC (permalink / raw)
  To: mwrap-perl

This may save memory with large projects
---
 Mwrap.xs     |  56 +++++-----
 mwrap_core.h | 289 ++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 222 insertions(+), 123 deletions(-)

diff --git a/Mwrap.xs b/Mwrap.xs
index 568c246..ad70ae3 100644
--- a/Mwrap.xs
+++ b/Mwrap.xs
@@ -16,8 +16,14 @@ static SV *location_string(struct src_loc *l)
 {
 	SV *ret;
 
-	if (loc_is_addr(l)) {
-		char **s = backtrace_symbols((void *)l->k, 1);
+	if (l->f) {
+		ret = newSV(0);
+		if (l->lineno == UINT_MAX)
+			sv_setpvf(ret, "%s:-", l->f->fn);
+		else
+			sv_setpvf(ret, "%s:%u", l->f->fn, l->lineno);
+	} else {
+		char **s = backtrace_symbols((void *)l->bt, (int)l->bt_len);
 
 		if (!s) {
 			fprintf(stderr, "backtrace_symbols => NULL: %s\n",
@@ -27,8 +33,6 @@ static SV *location_string(struct src_loc *l)
 
 		ret = newSVpvn(s[0], strlen(s[0]));
 		free(s);
-	} else {
-		ret = newSVpvn(l->k, l->capa - 1);
 	}
 
 	return ret;
@@ -153,42 +157,34 @@ mwrap_get(loc)
 PREINIT:
 	STRLEN len;
 	const char *str;
-	struct src_loc *k = &tsd.src_loc;
 	uintptr_t p;
-	struct cds_lfht_iter iter;
-	struct cds_lfht_node *cur;
-	struct cds_lfht *t;
-	struct src_loc *l = NULL;
+	struct src_loc *l;
 CODE:
 	++locating;
 	if (!SvPOK(loc))
 		XSRETURN_UNDEF;
 	str = SvPV(loc, len);
-	if (len > PATH_MAX)
+	if (len >= PATH_MAX)
 		XSRETURN_UNDEF;
 	if (extract_addr(str, len, (void **)&p)) {
-		memcpy(k->k, &p, sizeof(p));
-		k->capa = 0;
-		hash_loc(k, sizeof(p));
+		struct cds_lfht *t = CMM_LOAD_SHARED(totals);
+		struct src_loc *k;
+
+		if (!t)
+			XSRETURN_UNDEF;
+		k = &tsd.src_loc;
+
+		k->f = NULL;
+		k->lineno = 0;
+		k->bt[0] = p;
+		k->bt_len = 1;
+		hash_src_loc(k);
+		rcu_read_lock();
+		l = src_loc_get(t, k);
+		rcu_read_unlock();
 	} else {
-		memcpy(k->k, str, len + 1);
-		k->capa = len + 1;
-		hash_loc(k, k->capa);
+		l = src_loc_lookup(str, len);
 	}
-
-	if (!k)
-		XSRETURN_UNDEF;
-
-	rcu_read_lock();
-	t = CMM_LOAD_SHARED(totals);
-	if (!t) goto out_unlock;
-
-	cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
-	cur = cds_lfht_iter_get_node(&iter);
-	if (cur)
-		l = caa_container_of(cur, struct src_loc, hnode);
-out_unlock:
-	rcu_read_unlock();
 	RETVAL = l;
 OUTPUT:
 	RETVAL
diff --git a/mwrap_core.h b/mwrap_core.h
index 11b7346..181898f 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -9,6 +9,11 @@
 #	define MWRAP_PERL 0
 #endif
 
+#ifndef MWRAP_BT_MAX
+#	define	MWRAP_BT_MAX 32
+#endif
+
+
 #if MWRAP_PERL
 #	include "EXTERN.h"
 #	include "perl.h"
@@ -67,7 +72,7 @@ static MWRAP_TSD size_t locating;
 #ifndef PERL_IMPLICIT_CONTEXT
 static size_t *root_locating; /* determines if PL_curcop is our thread */
 #endif
-static struct cds_lfht *totals;
+static struct cds_lfht *files, *totals;
 union padded_mutex {
 	pthread_mutex_t mtx;
 	char pad[64]; /* cache alignment for common CPUs */
@@ -88,14 +93,19 @@ static union padded_mutex mutexes[MUTEX_NR] = {
 #endif
 };
 
+#ifdef static_assert
+/* we only use uint32_t for pathname storage for struct alignment */
+static_assert(UINT32_MAX > PATH_MAX, "UINT32_MAX > PATH_MAX");
+#endif
+
 static pthread_mutex_t *mutex_assign(void)
 {
 	return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
 }
 
-static struct cds_lfht *lfht_new(void)
+static struct cds_lfht *lfht_new(size_t size)
 {
-	return cds_lfht_new(16384, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
+	return cds_lfht_new(size, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
 }
 
 static void reset_mutexes(void)
@@ -118,32 +128,15 @@ static void *my_mempcpy(void *dest, const void *src, size_t n)
 #define RETURN_ADDRESS(nr) \
   (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
 
-#define UINT2STR_MAX (sizeof(unsigned) == 4 ? 10 : 19)
-static char *uint2str(unsigned num, char *dst, size_t *size)
-{
-	if (num <= 9) {
-		*size -= 1;
-		*dst++ = (char)(num + '0');
-		return dst;
-	} else {
-		char buf[UINT2STR_MAX];
-		char *end = buf + sizeof(buf);
-		char *p = end;
-		size_t adj;
-
-		do {
-			*size -= 1;
-			*--p = (char)((num % 10) + '0');
-			num /= 10;
-		} while (num && *size);
-
-		if (!num) {
-			adj = end - p;
-			return mempcpy(dst, p, adj);
-		}
-	}
-	return NULL;
-}
+/*
+ * only for interpreted sources (Perl/Ruby/etc), not backtrace_symbols files
+ */
+struct src_file {
+	struct cds_lfht_node nd; /* <=> files table */
+	uint32_t fn_hash;
+	uint32_t fn_len; /* < PATH_MAX */
+	char fn[]; /* NUL-terminated */
+};
 
 /* allocated via real_malloc, immortal for safety reasons */
 struct src_loc {
@@ -154,11 +147,14 @@ struct src_loc {
 	size_t frees;
 	size_t age_total; /* (age_total / frees) => mean age at free */
 	size_t max_lifespan;
-	struct cds_lfht_node hnode;
+	struct cds_lfht_node hnode; /* <=> totals table */
 	struct cds_list_head allocs; /* <=> alloc_hdr.node */
-	uint32_t hval;
-	uint32_t capa;
-	char k[];
+	uint32_t loc_hash;
+	uint32_t bt_len;
+	/* next 3 fields contiguous for hash_src_loc(): */
+	struct src_file *f;
+	size_t lineno;
+	uintptr_t bt[];
 };
 
 /*
@@ -178,11 +174,10 @@ struct alloc_hdr {
 	size_t size;
 };
 
-/* $PATHNAME:$LINENO */
 static MWRAP_TSD union {
-	char kbuf[
-		sizeof(struct src_loc) + PATH_MAX + sizeof(":") + UINT2STR_MAX
-	];
+	char kbuf[sizeof(struct src_file) + PATH_MAX];
+	char btbuf[sizeof(struct src_loc) + sizeof(uintptr_t) * MWRAP_BT_MAX];
+	struct src_file src_file;
 	struct src_loc src_loc;
 } tsd;
 
@@ -198,12 +193,12 @@ static void *hdr2ptr(struct alloc_hdr *h)
 
 static int loc_is_addr(const struct src_loc *l)
 {
-	return l->capa == 0;
+	return l->f == NULL;
 }
 
-static size_t loc_size(const struct src_loc *l)
+static size_t bt_bytelen(const struct src_loc *l)
 {
-	return loc_is_addr(l) ? sizeof(uintptr_t) : l->capa;
+	return sizeof(l->bt[0]) * l->bt_len;
 }
 
 static int loc_eq(struct cds_lfht_node *node, const void *key)
@@ -213,33 +208,53 @@ static int loc_eq(struct cds_lfht_node *node, const void *key)
 
 	existing = caa_container_of(node, struct src_loc, hnode);
 
-	return (k->hval == existing->hval &&
-		k->capa == existing->capa &&
-		memcmp(k->k, existing->k, loc_size(k)) == 0);
+	return (k->f == existing->f &&
+		k->lineno == existing->lineno &&
+		k->bt_len == existing->bt_len &&
+		!memcmp(k->bt, existing->bt, bt_bytelen(k)));
+}
+
+static int fn_eq(struct cds_lfht_node *node, const void *key)
+{
+	const struct src_file *existing;
+	const struct src_file *k = key;
+
+	existing = caa_container_of(node, struct src_file, nd);
+
+	return (k->fn_len == existing->fn_len &&
+		!memcmp(k->fn, existing->fn, k->fn_len));
 }
 
-static struct src_loc *totals_add_rcu(struct src_loc *k)
+static struct src_loc *src_loc_get(struct cds_lfht *t, const struct src_loc *k)
 {
 	struct cds_lfht_iter iter;
 	struct cds_lfht_node *cur;
-	struct src_loc *l = 0;
-	struct cds_lfht *t;
 
-again:
-	t = CMM_LOAD_SHARED(totals);
-	if (!t) return l;
 	mwrap_assert(rcu_read_ongoing());
-	cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
+	cds_lfht_lookup(t, k->loc_hash, loc_eq, k, &iter);
 	cur = cds_lfht_iter_get_node(&iter);
-	if (cur) {
-		l = caa_container_of(cur, struct src_loc, hnode);
+	return cur ? caa_container_of(cur, struct src_loc, hnode) : NULL;
+}
+
+static struct src_loc *totals_add_rcu(const struct src_loc *k)
+{
+	struct src_loc *l;
+	struct cds_lfht *t = CMM_LOAD_SHARED(totals);
+	if (!t) return NULL;
+
+again:
+	l = src_loc_get(t, k);
+	if (l) {
 		uatomic_add(&l->total, k->total);
 		uatomic_add(&l->allocations, 1);
 	} else {
-		size_t n = loc_size(k);
-		l = real_malloc(sizeof(*l) + n);
+		size_t n = bt_bytelen(k) + sizeof(*k);
+		struct cds_lfht_node *cur;
+
+		l = real_malloc(n);
 		if (!l) return l;
-		memcpy(l, k, sizeof(*l) + n);
+		memcpy(l, k, n);
+		l->freed_bytes = 0;
 		l->mtx = mutex_assign();
 		l->age_total = 0;
 		l->max_lifespan = 0;
@@ -247,7 +262,7 @@ again:
 		l->frees = 0;
 		l->allocations = 1;
 		CDS_INIT_LIST_HEAD(&l->allocs);
-		cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
+		cur = cds_lfht_add_unique(t, l->loc_hash, loc_eq, l, &l->hnode);
 		if (cur != &l->hnode) { /* lost race */
 			rcu_read_unlock();
 			real_free(l);
@@ -277,48 +292,84 @@ static const COP *mwp_curcop(void)
 	return NULL;
 }
 
-static void hash_loc(struct src_loc *k, size_t len)
+static uint32_t do_hash(const void *p, size_t len)
 {
 #if defined(XXH3_64bits)
 	union {
 		XXH64_hash_t u64;
 		uint32_t u32[2];
 	} u;
-	u.u64 = XXH3_64bits(k->k, len);
-	k->hval = u.u32[1];
+	u.u64 = XXH3_64bits(p, len);
+	return u.u32[1];
 #else
-	k->hval = jhash(k->k, len, 0xdeadbeef);
+	return jhash(p, len, 0xdeadbeef);
 #endif
 }
 
-static struct src_loc *assign_line(size_t size, const char *file, unsigned line)
+static void hash_src_loc(struct src_loc *l)
+{
+	l->loc_hash = do_hash(&l->f, sizeof(l->f) + sizeof(l->lineno) +
+				+ bt_bytelen(l));
+}
+
+static struct src_file *src_file_get(struct cds_lfht *t, struct src_file *k,
+					const char *fn, size_t fn_len)
+{
+	struct cds_lfht_iter iter;
+	struct cds_lfht_node *cur;
+
+	mwrap_assert(t); /* caller should've bailed if missing */
+	if (fn_len >= PATH_MAX)
+		return NULL;
+	k->fn_len = (uint32_t)fn_len;
+	memcpy(k->fn, fn, fn_len);
+	k->fn[fn_len] = 0;
+	k->fn_hash = do_hash(k->fn, fn_len);
+	mwrap_assert(rcu_read_ongoing());
+	cds_lfht_lookup(t, k->fn_hash, fn_eq, k, &iter);
+	cur = cds_lfht_iter_get_node(&iter);
+
+	return cur ? caa_container_of(cur, struct src_file, nd) : NULL;
+}
+
+static struct src_loc *assign_line(size_t size, const char *fn, unsigned lineno)
 {
 	/* avoid vsnprintf or anything which could call malloc here: */
 	size_t len;
-	struct src_loc *k;
-	char *dst;
-	size_t uint_size = UINT2STR_MAX;
+	struct src_file *f;
+	struct src_file *k = &tsd.src_file;
+	struct src_loc *l;
+	struct cds_lfht_node *cur;
+	struct cds_lfht *t = CMM_LOAD_SHARED(files);
+
+	mwrap_assert(t);
 
-	if (!file)
+	if (!fn)
 		return NULL;
-	len = strlen(file);
-	if (len > PATH_MAX)
-		len = PATH_MAX;
-	k = &tsd.src_loc;
-	k->total = size;
-	dst = mempcpy(k->k, file, len);
-	*dst++ = ':';
-
-	if (line == UINT_MAX) /* no line number */
-		*dst++ = '-';
-	else
-		dst = uint2str(line, dst, &uint_size);
-
-	mwrap_assert(dst && "bad math");
-	*dst = 0;	/* terminate string */
-	k->capa = (uint32_t)(dst - k->k + 1);
-	hash_loc(k, k->capa);
-	return totals_add_rcu(k);
+	len = strlen(fn);
+	if (len >= PATH_MAX)
+		len = PATH_MAX - 1;
+again:
+	f = src_file_get(t, k, fn, len);
+	if (!f) { /* doesn't exist, add a new one */
+		f = real_malloc(sizeof(*f) + len + 1);
+		if (!f) return NULL;
+		memcpy(f, k, sizeof(*f) + len + 1);
+		cur = cds_lfht_add_unique(t, f->fn_hash, fn_eq, f, &f->nd);
+		if (cur != &f->nd) { /* lost race */
+			rcu_read_unlock();
+			real_free(f);
+			rcu_read_lock();
+			goto again;
+		}
+	}
+	l = &tsd.src_loc;
+	l->total = size;
+	l->f = f;
+	l->lineno = lineno;
+	l->bt_len = 0;
+	hash_src_loc(l);
+	return totals_add_rcu(l);
 }
 
 static struct src_loc *
@@ -338,12 +389,14 @@ update_stats_rcu_lock(size_t *generation, size_t size, uintptr_t caller)
 	if (cop)
 		ret = assign_line(size, OutCopFILE(cop), CopLINE(cop));
 #endif /* MWRAP_PERL */
-	if (!ret) {
+	if (!ret) { /* no associated Perl code, just C/C++ */
 		k = &tsd.src_loc;
 		k->total = size;
-		memcpy(k->k, &caller, sizeof(caller));
-		k->capa = 0;
-		hash_loc(k, sizeof(caller));
+		k->f = NULL;
+		k->lineno = 0;
+		k->bt[0] = caller;
+		k->bt_len = 1;
+		hash_src_loc(k);
 		ret = totals_add_rcu(k);
 	}
 out:
@@ -633,22 +686,25 @@ static void *dump_to_file(struct dump_arg *a)
 	t = CMM_LOAD_SHARED(totals);
 	if (!t)
 		goto out_unlock;
+
 	cds_lfht_for_each_entry(t, &iter, l, hnode) {
-		const void *p = l->k;
 		if (l->total <= a->min) continue;
 
 		if (loc_is_addr(l)) {
-			char **s = backtrace_symbols(p, 1);
+			char **s = backtrace_symbols((void **)l->bt, 1);
+
 			if (s) {
-				p = s[0];
+				fprintf(a->fp, "%16zu %12zu %s\n",
+					l->total, l->allocations, s[0]);
 				free(s);
 			} else {
 				fprintf(stderr, "backtrace_symbols: %s\n",
 					strerror(errno));
 			}
+		} else {
+			fprintf(a->fp, "%16zu %12zu %s:%u\n",
+				l->total, l->allocations, l->f->fn, l->lineno);
 		}
-		fprintf(a->fp, "%16zu %12zu %s\n",
-			l->total, l->allocations, (const char *)p);
 	}
 out_unlock:
 	rcu_read_unlock();
@@ -666,6 +722,50 @@ static int extract_addr(const char *str, size_t len, void **p)
 #endif
 }
 
+/* str is $PATHNAME:$LINENO, len is strlen(str) */
+static struct src_loc *src_loc_lookup(const char *str, size_t len)
+{
+	char *c = memrchr(str, ':', len);
+	const char *end = str + len;
+	unsigned lineno;
+	size_t fn_len;
+	struct src_file *f;
+	struct src_loc *k, *l = NULL;
+	struct cds_lfht *t = CMM_LOAD_SHARED(files);
+
+	mwrap_assert(str[len] == 0);
+	if (!c || c == end || !t)
+		return NULL;
+
+	fn_len = c - str;
+	c++;
+	if (*c == '-') {
+		lineno = UINT_MAX;
+	} else {
+		lineno = 0;
+		for (; *c; c++) {
+			if (*c < '0' || *c > '9')
+				return NULL;
+			lineno *= 10;
+			lineno += (*c - '0');
+		}
+	}
+	rcu_read_lock();
+	f = src_file_get(t, &tsd.src_file, str, fn_len);
+	t = CMM_LOAD_SHARED(totals);
+	if (f && t) {
+		struct src_loc *k = &tsd.src_loc;
+
+		k->f = f;
+		k->lineno = lineno;
+		k->bt_len = 0;
+		hash_src_loc(k);
+		l = src_loc_get(t, k);
+	}
+	rcu_read_unlock();
+	return l;
+}
+
 #ifndef O_CLOEXEC
 #  define O_CLOEXEC 0
 #endif
@@ -753,7 +853,10 @@ __attribute__((constructor)) static void mwrap_ctor(void)
 	if (!STATIC_MTX_INIT_OK)
 		reset_mutexes();
 	/* initialize mutexes used by urcu-bp */
-	CMM_STORE_SHARED(totals, lfht_new());
+	CMM_STORE_SHARED(files, lfht_new(256));
+	if (!CMM_LOAD_SHARED(files))
+		fprintf(stderr, "failed to allocate files table\n");
+	CMM_STORE_SHARED(totals, lfht_new(16384));
 	if (!CMM_LOAD_SHARED(totals))
 		fprintf(stderr, "failed to allocate totals table\n");
 	h = real_malloc(sizeof(struct alloc_hdr));

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH v2] pool filenames into separate table
  2022-12-02 11:17 [RFC] pool filenames into separate table Eric Wong
@ 2022-12-09 19:32 ` Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2022-12-09 19:32 UTC (permalink / raw)
  To: mwrap-perl

This will save memory with large projects where files have many
allocation sites.  More importantly, the internal change will
make supporting deeper C backtraces (via backtrace(3)) possible
in the future.
---
Range-diff:
1:  df25132 ! 1:  8b5da49 pool filenames into separate table
    @@ Mwrap.xs: static SV *location_string(struct src_loc *l)
     +		if (l->lineno == UINT_MAX)
     +			sv_setpvf(ret, "%s:-", l->f->fn);
     +		else
    -+			sv_setpvf(ret, "%s:%u", l->f->fn, l->lineno);
    ++			sv_setpvf(ret, "%s:%zu", l->f->fn, l->lineno);
     +	} else {
     +		char **s = backtrace_symbols((void *)l->bt, (int)l->bt_len);
      
    @@ mwrap_core.h: struct src_loc {
     +	uint32_t bt_len;
     +	/* next 3 fields contiguous for hash_src_loc(): */
     +	struct src_file *f;
    ++	/* .lineno only needs `unsigned', but is size_t for alignment */
     +	size_t lineno;
     +	uintptr_t bt[];
      };
    @@ mwrap_core.h: static void *dump_to_file(struct dump_arg *a)
      					strerror(errno));
      			}
     +		} else {
    -+			fprintf(a->fp, "%16zu %12zu %s:%u\n",
    ++			fprintf(a->fp, "%16zu %12zu %s:%zu\n",
     +				l->total, l->allocations, l->f->fn, l->lineno);
      		}
     -		fprintf(a->fp, "%16zu %12zu %s\n",
    @@ mwrap_core.h: static int extract_addr(const char *str, size_t len, void **p)
     +	unsigned lineno;
     +	size_t fn_len;
     +	struct src_file *f;
    -+	struct src_loc *k, *l = NULL;
    ++	struct src_loc *l = NULL;
     +	struct cds_lfht *t = CMM_LOAD_SHARED(files);
     +
     +	mwrap_assert(str[len] == 0);

 Mwrap.xs     |  56 +++++-----
 mwrap_core.h | 290 ++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 223 insertions(+), 123 deletions(-)

diff --git a/Mwrap.xs b/Mwrap.xs
index 568c246..0c00914 100644
--- a/Mwrap.xs
+++ b/Mwrap.xs
@@ -16,8 +16,14 @@ static SV *location_string(struct src_loc *l)
 {
 	SV *ret;
 
-	if (loc_is_addr(l)) {
-		char **s = backtrace_symbols((void *)l->k, 1);
+	if (l->f) {
+		ret = newSV(0);
+		if (l->lineno == UINT_MAX)
+			sv_setpvf(ret, "%s:-", l->f->fn);
+		else
+			sv_setpvf(ret, "%s:%zu", l->f->fn, l->lineno);
+	} else {
+		char **s = backtrace_symbols((void *)l->bt, (int)l->bt_len);
 
 		if (!s) {
 			fprintf(stderr, "backtrace_symbols => NULL: %s\n",
@@ -27,8 +33,6 @@ static SV *location_string(struct src_loc *l)
 
 		ret = newSVpvn(s[0], strlen(s[0]));
 		free(s);
-	} else {
-		ret = newSVpvn(l->k, l->capa - 1);
 	}
 
 	return ret;
@@ -153,42 +157,34 @@ mwrap_get(loc)
 PREINIT:
 	STRLEN len;
 	const char *str;
-	struct src_loc *k = &tsd.src_loc;
 	uintptr_t p;
-	struct cds_lfht_iter iter;
-	struct cds_lfht_node *cur;
-	struct cds_lfht *t;
-	struct src_loc *l = NULL;
+	struct src_loc *l;
 CODE:
 	++locating;
 	if (!SvPOK(loc))
 		XSRETURN_UNDEF;
 	str = SvPV(loc, len);
-	if (len > PATH_MAX)
+	if (len >= PATH_MAX)
 		XSRETURN_UNDEF;
 	if (extract_addr(str, len, (void **)&p)) {
-		memcpy(k->k, &p, sizeof(p));
-		k->capa = 0;
-		hash_loc(k, sizeof(p));
+		struct cds_lfht *t = CMM_LOAD_SHARED(totals);
+		struct src_loc *k;
+
+		if (!t)
+			XSRETURN_UNDEF;
+		k = &tsd.src_loc;
+
+		k->f = NULL;
+		k->lineno = 0;
+		k->bt[0] = p;
+		k->bt_len = 1;
+		hash_src_loc(k);
+		rcu_read_lock();
+		l = src_loc_get(t, k);
+		rcu_read_unlock();
 	} else {
-		memcpy(k->k, str, len + 1);
-		k->capa = len + 1;
-		hash_loc(k, k->capa);
+		l = src_loc_lookup(str, len);
 	}
-
-	if (!k)
-		XSRETURN_UNDEF;
-
-	rcu_read_lock();
-	t = CMM_LOAD_SHARED(totals);
-	if (!t) goto out_unlock;
-
-	cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
-	cur = cds_lfht_iter_get_node(&iter);
-	if (cur)
-		l = caa_container_of(cur, struct src_loc, hnode);
-out_unlock:
-	rcu_read_unlock();
 	RETVAL = l;
 OUTPUT:
 	RETVAL
diff --git a/mwrap_core.h b/mwrap_core.h
index 772026f..f2d5a09 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -9,6 +9,11 @@
 #	define MWRAP_PERL 0
 #endif
 
+#ifndef MWRAP_BT_MAX
+#	define	MWRAP_BT_MAX 32
+#endif
+
+
 #if MWRAP_PERL
 #	include "EXTERN.h"
 #	include "perl.h"
@@ -67,7 +72,7 @@ static MWRAP_TSD size_t locating;
 #ifndef PERL_IMPLICIT_CONTEXT
 static size_t *root_locating; /* determines if PL_curcop is our thread */
 #endif
-static struct cds_lfht *totals;
+static struct cds_lfht *files, *totals;
 union padded_mutex {
 	pthread_mutex_t mtx;
 	char pad[64]; /* cache alignment for common CPUs */
@@ -88,14 +93,19 @@ static union padded_mutex mutexes[MUTEX_NR] = {
 #endif
 };
 
+#ifdef static_assert
+/* we only use uint32_t for pathname storage for struct alignment */
+static_assert(UINT32_MAX > PATH_MAX, "UINT32_MAX > PATH_MAX");
+#endif
+
 static pthread_mutex_t *mutex_assign(void)
 {
 	return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
 }
 
-static struct cds_lfht *lfht_new(void)
+static struct cds_lfht *lfht_new(size_t size)
 {
-	return cds_lfht_new(16384, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
+	return cds_lfht_new(size, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
 }
 
 static void reset_mutexes(void)
@@ -118,32 +128,15 @@ static void *my_mempcpy(void *dest, const void *src, size_t n)
 #define RETURN_ADDRESS(nr) \
   (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
 
-#define UINT2STR_MAX (sizeof(unsigned) == 4 ? 10 : 19)
-static char *uint2str(unsigned num, char *dst, size_t *size)
-{
-	if (num <= 9) {
-		*size -= 1;
-		*dst++ = (char)(num + '0');
-		return dst;
-	} else {
-		char buf[UINT2STR_MAX];
-		char *end = buf + sizeof(buf);
-		char *p = end;
-		size_t adj;
-
-		do {
-			*size -= 1;
-			*--p = (char)((num % 10) + '0');
-			num /= 10;
-		} while (num && *size);
-
-		if (!num) {
-			adj = end - p;
-			return mempcpy(dst, p, adj);
-		}
-	}
-	return NULL;
-}
+/*
+ * only for interpreted sources (Perl/Ruby/etc), not backtrace_symbols files
+ */
+struct src_file {
+	struct cds_lfht_node nd; /* <=> files table */
+	uint32_t fn_hash;
+	uint32_t fn_len; /* < PATH_MAX */
+	char fn[]; /* NUL-terminated */
+};
 
 /* allocated via real_malloc, immortal for safety reasons */
 struct src_loc {
@@ -154,11 +147,15 @@ struct src_loc {
 	size_t frees;
 	size_t age_total; /* (age_total / frees) => mean age at free */
 	size_t max_lifespan;
-	struct cds_lfht_node hnode;
+	struct cds_lfht_node hnode; /* <=> totals table */
 	struct cds_list_head allocs; /* <=> alloc_hdr.node */
-	uint32_t hval;
-	uint32_t capa;
-	char k[];
+	uint32_t loc_hash;
+	uint32_t bt_len;
+	/* next 3 fields contiguous for hash_src_loc(): */
+	struct src_file *f;
+	/* .lineno only needs `unsigned', but is size_t for alignment */
+	size_t lineno;
+	uintptr_t bt[];
 };
 
 /*
@@ -178,11 +175,10 @@ struct alloc_hdr {
 	size_t size;
 };
 
-/* $PATHNAME:$LINENO */
 static MWRAP_TSD union {
-	char kbuf[
-		sizeof(struct src_loc) + PATH_MAX + sizeof(":") + UINT2STR_MAX
-	];
+	char kbuf[sizeof(struct src_file) + PATH_MAX];
+	char btbuf[sizeof(struct src_loc) + sizeof(uintptr_t) * MWRAP_BT_MAX];
+	struct src_file src_file;
 	struct src_loc src_loc;
 } tsd;
 
@@ -198,12 +194,12 @@ static void *hdr2ptr(struct alloc_hdr *h)
 
 static int loc_is_addr(const struct src_loc *l)
 {
-	return l->capa == 0;
+	return l->f == NULL;
 }
 
-static size_t loc_size(const struct src_loc *l)
+static size_t bt_bytelen(const struct src_loc *l)
 {
-	return loc_is_addr(l) ? sizeof(uintptr_t) : l->capa;
+	return sizeof(l->bt[0]) * l->bt_len;
 }
 
 static int loc_eq(struct cds_lfht_node *node, const void *key)
@@ -213,33 +209,53 @@ static int loc_eq(struct cds_lfht_node *node, const void *key)
 
 	existing = caa_container_of(node, struct src_loc, hnode);
 
-	return (k->hval == existing->hval &&
-		k->capa == existing->capa &&
-		memcmp(k->k, existing->k, loc_size(k)) == 0);
+	return (k->f == existing->f &&
+		k->lineno == existing->lineno &&
+		k->bt_len == existing->bt_len &&
+		!memcmp(k->bt, existing->bt, bt_bytelen(k)));
+}
+
+static int fn_eq(struct cds_lfht_node *node, const void *key)
+{
+	const struct src_file *existing;
+	const struct src_file *k = key;
+
+	existing = caa_container_of(node, struct src_file, nd);
+
+	return (k->fn_len == existing->fn_len &&
+		!memcmp(k->fn, existing->fn, k->fn_len));
 }
 
-static struct src_loc *totals_add_rcu(struct src_loc *k)
+static struct src_loc *src_loc_get(struct cds_lfht *t, const struct src_loc *k)
 {
 	struct cds_lfht_iter iter;
 	struct cds_lfht_node *cur;
-	struct src_loc *l = 0;
-	struct cds_lfht *t;
 
-again:
-	t = CMM_LOAD_SHARED(totals);
-	if (!t) return l;
 	mwrap_assert(rcu_read_ongoing());
-	cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
+	cds_lfht_lookup(t, k->loc_hash, loc_eq, k, &iter);
 	cur = cds_lfht_iter_get_node(&iter);
-	if (cur) {
-		l = caa_container_of(cur, struct src_loc, hnode);
+	return cur ? caa_container_of(cur, struct src_loc, hnode) : NULL;
+}
+
+static struct src_loc *totals_add_rcu(const struct src_loc *k)
+{
+	struct src_loc *l;
+	struct cds_lfht *t = CMM_LOAD_SHARED(totals);
+	if (!t) return NULL;
+
+again:
+	l = src_loc_get(t, k);
+	if (l) {
 		uatomic_add(&l->total, k->total);
 		uatomic_add(&l->allocations, 1);
 	} else {
-		size_t n = loc_size(k);
-		l = real_malloc(sizeof(*l) + n);
+		size_t n = bt_bytelen(k) + sizeof(*k);
+		struct cds_lfht_node *cur;
+
+		l = real_malloc(n);
 		if (!l) return l;
-		memcpy(l, k, sizeof(*l) + n);
+		memcpy(l, k, n);
+		l->freed_bytes = 0;
 		l->mtx = mutex_assign();
 		l->age_total = 0;
 		l->max_lifespan = 0;
@@ -247,7 +263,7 @@ again:
 		l->frees = 0;
 		l->allocations = 1;
 		CDS_INIT_LIST_HEAD(&l->allocs);
-		cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
+		cur = cds_lfht_add_unique(t, l->loc_hash, loc_eq, l, &l->hnode);
 		if (cur != &l->hnode) { /* lost race */
 			rcu_read_unlock();
 			real_free(l);
@@ -277,48 +293,84 @@ static const COP *mwp_curcop(void)
 	return NULL;
 }
 
-static void hash_loc(struct src_loc *k, size_t len)
+static uint32_t do_hash(const void *p, size_t len)
 {
 #if defined(XXH3_64bits)
 	union {
 		XXH64_hash_t u64;
 		uint32_t u32[2];
 	} u;
-	u.u64 = XXH3_64bits(k->k, len);
-	k->hval = u.u32[1];
+	u.u64 = XXH3_64bits(p, len);
+	return u.u32[1];
 #else
-	k->hval = jhash(k->k, len, 0xdeadbeef);
+	return jhash(p, len, 0xdeadbeef);
 #endif
 }
 
-static struct src_loc *assign_line(size_t size, const char *file, unsigned line)
+static void hash_src_loc(struct src_loc *l)
+{
+	l->loc_hash = do_hash(&l->f, sizeof(l->f) + sizeof(l->lineno) +
+				+ bt_bytelen(l));
+}
+
+static struct src_file *src_file_get(struct cds_lfht *t, struct src_file *k,
+					const char *fn, size_t fn_len)
+{
+	struct cds_lfht_iter iter;
+	struct cds_lfht_node *cur;
+
+	mwrap_assert(t); /* caller should've bailed if missing */
+	if (fn_len >= PATH_MAX)
+		return NULL;
+	k->fn_len = (uint32_t)fn_len;
+	memcpy(k->fn, fn, fn_len);
+	k->fn[fn_len] = 0;
+	k->fn_hash = do_hash(k->fn, fn_len);
+	mwrap_assert(rcu_read_ongoing());
+	cds_lfht_lookup(t, k->fn_hash, fn_eq, k, &iter);
+	cur = cds_lfht_iter_get_node(&iter);
+
+	return cur ? caa_container_of(cur, struct src_file, nd) : NULL;
+}
+
+static struct src_loc *assign_line(size_t size, const char *fn, unsigned lineno)
 {
 	/* avoid vsnprintf or anything which could call malloc here: */
 	size_t len;
-	struct src_loc *k;
-	char *dst;
-	size_t uint_size = UINT2STR_MAX;
+	struct src_file *f;
+	struct src_file *k = &tsd.src_file;
+	struct src_loc *l;
+	struct cds_lfht_node *cur;
+	struct cds_lfht *t = CMM_LOAD_SHARED(files);
+
+	mwrap_assert(t);
 
-	if (!file)
+	if (!fn)
 		return NULL;
-	len = strlen(file);
-	if (len > PATH_MAX)
-		len = PATH_MAX;
-	k = &tsd.src_loc;
-	k->total = size;
-	dst = mempcpy(k->k, file, len);
-	*dst++ = ':';
-
-	if (line == UINT_MAX) /* no line number */
-		*dst++ = '-';
-	else
-		dst = uint2str(line, dst, &uint_size);
-
-	mwrap_assert(dst && "bad math");
-	*dst = 0;	/* terminate string */
-	k->capa = (uint32_t)(dst - k->k + 1);
-	hash_loc(k, k->capa);
-	return totals_add_rcu(k);
+	len = strlen(fn);
+	if (len >= PATH_MAX)
+		len = PATH_MAX - 1;
+again:
+	f = src_file_get(t, k, fn, len);
+	if (!f) { /* doesn't exist, add a new one */
+		f = real_malloc(sizeof(*f) + len + 1);
+		if (!f) return NULL;
+		memcpy(f, k, sizeof(*f) + len + 1);
+		cur = cds_lfht_add_unique(t, f->fn_hash, fn_eq, f, &f->nd);
+		if (cur != &f->nd) { /* lost race */
+			rcu_read_unlock();
+			real_free(f);
+			rcu_read_lock();
+			goto again;
+		}
+	}
+	l = &tsd.src_loc;
+	l->total = size;
+	l->f = f;
+	l->lineno = lineno;
+	l->bt_len = 0;
+	hash_src_loc(l);
+	return totals_add_rcu(l);
 }
 
 static struct src_loc *
@@ -338,12 +390,14 @@ update_stats_rcu_lock(size_t *generation, size_t size, uintptr_t caller)
 	if (cop)
 		ret = assign_line(size, OutCopFILE(cop), CopLINE(cop));
 #endif /* MWRAP_PERL */
-	if (!ret) {
+	if (!ret) { /* no associated Perl code, just C/C++ */
 		k = &tsd.src_loc;
 		k->total = size;
-		memcpy(k->k, &caller, sizeof(caller));
-		k->capa = 0;
-		hash_loc(k, sizeof(caller));
+		k->f = NULL;
+		k->lineno = 0;
+		k->bt[0] = caller;
+		k->bt_len = 1;
+		hash_src_loc(k);
 		ret = totals_add_rcu(k);
 	}
 out:
@@ -633,22 +687,25 @@ static void *dump_to_file(struct dump_arg *a)
 	t = CMM_LOAD_SHARED(totals);
 	if (!t)
 		goto out_unlock;
+
 	cds_lfht_for_each_entry(t, &iter, l, hnode) {
-		const void *p = l->k;
 		if (l->total <= a->min) continue;
 
 		if (loc_is_addr(l)) {
-			char **s = backtrace_symbols(p, 1);
+			char **s = backtrace_symbols((void **)l->bt, 1);
+
 			if (s) {
-				p = s[0];
+				fprintf(a->fp, "%16zu %12zu %s\n",
+					l->total, l->allocations, s[0]);
 				free(s);
 			} else {
 				fprintf(stderr, "backtrace_symbols: %s\n",
 					strerror(errno));
 			}
+		} else {
+			fprintf(a->fp, "%16zu %12zu %s:%zu\n",
+				l->total, l->allocations, l->f->fn, l->lineno);
 		}
-		fprintf(a->fp, "%16zu %12zu %s\n",
-			l->total, l->allocations, (const char *)p);
 	}
 out_unlock:
 	rcu_read_unlock();
@@ -666,6 +723,50 @@ static int extract_addr(const char *str, size_t len, void **p)
 #endif
 }
 
+/* str is $PATHNAME:$LINENO, len is strlen(str) */
+static struct src_loc *src_loc_lookup(const char *str, size_t len)
+{
+	char *c = memrchr(str, ':', len);
+	const char *end = str + len;
+	unsigned lineno;
+	size_t fn_len;
+	struct src_file *f;
+	struct src_loc *l = NULL;
+	struct cds_lfht *t = CMM_LOAD_SHARED(files);
+
+	mwrap_assert(str[len] == 0);
+	if (!c || c == end || !t)
+		return NULL;
+
+	fn_len = c - str;
+	c++;
+	if (*c == '-') {
+		lineno = UINT_MAX;
+	} else {
+		lineno = 0;
+		for (; *c; c++) {
+			if (*c < '0' || *c > '9')
+				return NULL;
+			lineno *= 10;
+			lineno += (*c - '0');
+		}
+	}
+	rcu_read_lock();
+	f = src_file_get(t, &tsd.src_file, str, fn_len);
+	t = CMM_LOAD_SHARED(totals);
+	if (f && t) {
+		struct src_loc *k = &tsd.src_loc;
+
+		k->f = f;
+		k->lineno = lineno;
+		k->bt_len = 0;
+		hash_src_loc(k);
+		l = src_loc_get(t, k);
+	}
+	rcu_read_unlock();
+	return l;
+}
+
 #ifndef O_CLOEXEC
 #  define O_CLOEXEC 0
 #endif
@@ -754,7 +855,10 @@ __attribute__((constructor)) static void mwrap_ctor(void)
 	if (!STATIC_MTX_INIT_OK)
 		reset_mutexes();
 	/* initialize mutexes used by urcu-bp */
-	CMM_STORE_SHARED(totals, lfht_new());
+	CMM_STORE_SHARED(files, lfht_new(256));
+	if (!CMM_LOAD_SHARED(files))
+		fprintf(stderr, "failed to allocate files table\n");
+	CMM_STORE_SHARED(totals, lfht_new(16384));
 	if (!CMM_LOAD_SHARED(totals))
 		fprintf(stderr, "failed to allocate totals table\n");
 	h = real_malloc(sizeof(struct alloc_hdr));

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-12-09 19:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-02 11:17 [RFC] pool filenames into separate table Eric Wong
2022-12-09 19:32 ` [PATCH v2] " Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mwrap-perl.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).