mwrap (Perl version) user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
* [PATCH 0/7] shrinkage...
@ 2022-12-19 11:19 Eric Wong
  2022-12-19 11:19 ` [PATCH 1/7] core: shrink src_loc 8 bytes on x86-64 Eric Wong
                   ` (6 more replies)
  0 siblings, 7 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
  To: mwrap-perl

"struct src_loc" gets a small diet, and there's a bunch of
code simplification and golfing as well.

Eric Wong (7):
  core: shrink src_loc 8 bytes on x86-64
  core: shrink src_loc by another 8 bytes on x86-64
  core: drop FreeBSD-specific mutex initialization
  calloc: consolidate ENOMEM handling
  *alloc: limit scope of RCU lock
  core: simplify callers of alloc_insert_rcu
  README: add some caveats (signalfd usage w/ current URCU)

 Mwrap.xs     |   4 +-
 README       |   5 ++
 httpd.h      |   6 +-
 mwrap_core.h | 208 +++++++++++++++++++++++----------------------------
 4 files changed, 103 insertions(+), 120 deletions(-)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/7] core: shrink src_loc 8 bytes on x86-64
  2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
  2022-12-19 11:19 ` [PATCH 2/7] core: shrink src_loc by another " Eric Wong
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
  To: mwrap-perl

We can rely on the stable value of ->loc_hash to assign the
mutex at when it requires locking rather than relying on a
monotonically increasing counter.
---
 mwrap_core.h | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/mwrap_core.h b/mwrap_core.h
index 7681ca5..7be0a7a 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -93,7 +93,6 @@ union padded_mutex {
 #else /* only tested on Linux + glibc */
 #  define STATIC_MTX_INIT_OK (1)
 #endif
-static size_t mutex_i;
 static union padded_mutex mutexes[MUTEX_NR] = {
 #if STATIC_MTX_INIT_OK
 	[0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
@@ -105,11 +104,6 @@ static union padded_mutex mutexes[MUTEX_NR] = {
 static_assert(UINT32_MAX > PATH_MAX, "UINT32_MAX > PATH_MAX");
 #endif
 
-static pthread_mutex_t *mutex_assign(void)
-{
-	return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
-}
-
 static struct cds_lfht *lfht_new(size_t size)
 {
 	return cds_lfht_new(size, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
@@ -163,7 +157,6 @@ struct src_file {
 
 /* allocated via real_malloc, immortal for safety reasons */
 struct src_loc {
-	pthread_mutex_t *mtx;
 	size_t total;
 	size_t freed_bytes;
 	size_t allocations;
@@ -309,7 +302,6 @@ again:
 		if (!l) return l;
 		memcpy(l, k, n);
 		l->freed_bytes = 0;
-		l->mtx = mutex_assign();
 		l->age_total = 0;
 		l->max_lifespan = 0;
 		l->freed_bytes = 0;
@@ -470,6 +462,13 @@ static void free_hdr_rcu(struct rcu_head *dead)
 	real_free(h->real);
 }
 
+static pthread_mutex_t *src_loc_mutex_lock(const struct src_loc *l)
+{
+	pthread_mutex_t *mtx = &mutexes[l->loc_hash & MUTEX_MASK].mtx;
+	CHECK(int, 0, pthread_mutex_lock(mtx));
+	return mtx;
+}
+
 void free(void *p)
 {
 	if (p) {
@@ -485,11 +484,11 @@ void free(void *p)
 			uatomic_inc(&l->frees);
 			uatomic_add(&l->age_total, age);
 
-			CHECK(int, 0, pthread_mutex_lock(l->mtx));
+			pthread_mutex_t *mtx = src_loc_mutex_lock(l);
 			cds_list_del_rcu(&h->anode);
 			if (age > l->max_lifespan)
 				l->max_lifespan = age;
-			CHECK(int, 0, pthread_mutex_unlock(l->mtx));
+			CHECK(int, 0, pthread_mutex_unlock(mtx));
 
 			call_rcu(&h->as.dead, free_hdr_rcu);
 		} else {
@@ -509,9 +508,9 @@ alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size,
 	h->as.live.loc = l;
 	h->as.live.gen = generation;
 	if (l) {
-		CHECK(int, 0, pthread_mutex_lock(l->mtx));
+		pthread_mutex_t *mtx = src_loc_mutex_lock(l);
 		cds_list_add_rcu(&h->anode, &l->allocs);
-		CHECK(int, 0, pthread_mutex_unlock(l->mtx));
+		CHECK(int, 0, pthread_mutex_unlock(mtx));
 	}
 }
 

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/7] core: shrink src_loc by another 8 bytes on x86-64
  2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
  2022-12-19 11:19 ` [PATCH 1/7] core: shrink src_loc 8 bytes on x86-64 Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
  2022-12-19 11:19 ` [PATCH 3/7] core: drop FreeBSD-specific mutex initialization Eric Wong
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
  To: mwrap-perl

lineno shouldn't need more than 24-bits (or >= 16.7 million).
Our bt_len is capped at 32, and nobody should ever need to care
about tracking allocations if call stack depths exceed 256.
---
 Mwrap.xs     |  4 ++--
 httpd.h      |  6 ++---
 mwrap_core.h | 62 +++++++++++++++++++++++++++++++++-------------------
 3 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/Mwrap.xs b/Mwrap.xs
index 846d89d..846d0cb 100644
--- a/Mwrap.xs
+++ b/Mwrap.xs
@@ -19,10 +19,10 @@ static SV *location_string(struct src_loc *l)
 
 	if (l->f) {
 		sv_catpv(ret, l->f->fn);
-		if (l->lineno == UINT_MAX)
+		if (l->lineno == U24_MAX)
 			sv_catpvs(ret, ":-");
 		else
-			sv_catpvf(ret, ":%zu", l->lineno);
+			sv_catpvf(ret, ":%u", l->lineno);
 	}
 	if (l->bt_len) {
 		AUTO_FREE char **s = bt_syms(l->bt, l->bt_len);
diff --git a/httpd.h b/httpd.h
index 8e58286..eddea97 100644
--- a/httpd.h
+++ b/httpd.h
@@ -473,10 +473,10 @@ static off_t write_loc_name(FILE *fp, const struct src_loc *l)
 	}
 	if (l->f) {
 		fputs(l->f->fn, fp);
-		if (l->lineno == UINT_MAX)
+		if (l->lineno == U24_MAX)
 			FPUTS(":-", fp);
 		else
-			fprintf(fp, ":%zu", l->lineno);
+			fprintf(fp, ":%u", l->lineno);
 	}
 	if (l->bt_len) {
 		AUTO_FREE char **s = bt_syms(l->bt, l->bt_len);
@@ -703,7 +703,7 @@ static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
 				hsl->live, hsl->mean_life, hsl->max_life);
 			FPUTS("<td><a\nhref=\"../at/", fp);
 
-			write_b64_url(fp, (const void *)&hsl->sl->f,
+			write_b64_url(fp, src_loc_hash_tip(hsl->sl),
 					src_loc_hash_len(hsl->sl));
 
 			FPUTS("\">", fp);
diff --git a/mwrap_core.h b/mwrap_core.h
index 7be0a7a..7f52f39 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -61,6 +61,8 @@ typedef void COP;
 #	include "jhash.h"
 #endif
 
+#define U24_MAX (1U << 24)
+
 /*
  * Perl doesn't have a GC the same way (C) Ruby does, so no GC count.
  * Instead, the relative age of an object is the number of total bytes
@@ -166,18 +168,15 @@ struct src_loc {
 	struct cds_lfht_node hnode; /* <=> totals table */
 	struct cds_list_head allocs; /* <=> alloc_hdr.node */
 	uint32_t loc_hash;
-	uint32_t bt_len;
+	uint8_t bt_len;
 	/* next 3 fields contiguous for hash_src_loc(): */
+	unsigned lineno:24; /* nobody should have >=16.7 LoC in one file */
 	struct src_file *f;
-	/* .lineno only needs `unsigned', but is size_t for alignment */
-	size_t lineno;
 	void *bt[];
-};
+} __attribute__((packed,aligned(8)));
 
-#ifdef static_assert
-static_assert(sizeof(struct src_file *) == sizeof(size_t),
-		"size_t is the same size as a pointer");
-#endif
+/* sizeof() doesn't work on bitfields */
+#define SIZEOF_LINENO (size_t)(24 / 8)
 
 /*
  * Every allocation has this in the header, maintain alignment with malloc
@@ -216,13 +215,13 @@ union stk_bt {
  * thing.
  */
 #ifdef static_assert
-static_assert(offsetof(struct src_loc, lineno) + sizeof(void *) ==
+static_assert(offsetof(struct src_loc, f) + sizeof(void *) ==
 		offsetof(struct src_loc, bt),
 		"bt lineno is is bt[-1]");
 #endif
 static void **bt_dst(union stk_bt *bt)
 {
-	return (void **)&bt->sl.lineno;
+	return (void **)&bt->sl.f;
 }
 
 static struct alloc_hdr *ptr2hdr(void *p)
@@ -247,7 +246,12 @@ static size_t bt_bytelen(const struct src_loc *l)
 
 static size_t src_loc_hash_len(const struct src_loc *l)
 {
-	return sizeof(l->f) + sizeof(l->lineno) + + bt_bytelen(l);
+	return sizeof(l->f) + SIZEOF_LINENO + bt_bytelen(l);
+}
+
+static void *src_loc_hash_tip(const struct src_loc *l)
+{
+	return (void *)((uintptr_t)&l->bt_len + sizeof(l->bt_len));
 }
 
 static int loc_eq(struct cds_lfht_node *node, const void *key)
@@ -258,7 +262,8 @@ static int loc_eq(struct cds_lfht_node *node, const void *key)
 	existing = caa_container_of(node, struct src_loc, hnode);
 
 	return (k->bt_len == existing->bt_len &&
-		!memcmp(&k->f, &existing->f, src_loc_hash_len(k)));
+		!memcmp(src_loc_hash_tip(k), src_loc_hash_tip(existing),
+			src_loc_hash_len(k)));
 }
 
 static int fn_eq(struct cds_lfht_node *node, const void *key)
@@ -356,7 +361,7 @@ static uint32_t do_hash(const void *p, size_t len)
 
 static void hash_src_loc(struct src_loc *l)
 {
-	l->loc_hash = do_hash(&l->f, src_loc_hash_len(l));
+	l->loc_hash = do_hash(src_loc_hash_tip(l), src_loc_hash_len(l));
 }
 
 static struct src_file *src_file_get(struct cds_lfht *t, struct src_file *k,
@@ -401,6 +406,15 @@ static struct src_loc *assign_line(size_t size, const COP *cop,
 	size_t len = strlen(fn);
 	if (len >= PATH_MAX)
 		len = PATH_MAX - 1;
+
+	if (lineno == UINT_MAX) { /* NOLINE in Perl is UINT_MAX */
+		lineno = U24_MAX;
+	} else if (lineno > U24_MAX) {
+		fprintf(stderr,
+			"%s:%u line number exceeds limit (%u), capped\n",
+			fn, lineno, U24_MAX);
+		lineno = U24_MAX;
+	}
 again:
 	f = src_file_get(t, &sf.sf, fn, len);
 	if (!f) { /* doesn't exist, add a new one */
@@ -417,6 +431,7 @@ again:
 			goto again;
 		}
 	}
+
 	sl->total = size;
 	sl->f = f;
 	sl->lineno = lineno;
@@ -767,7 +782,7 @@ static void *dump_to_file(struct dump_arg *a)
 				fprintf(a->fp, "%16zu %12zu %s\n",
 					l->total, l->allocations, s[0]);
 		} else {
-			fprintf(a->fp, "%16zu %12zu %s:%zu\n",
+			fprintf(a->fp, "%16zu %12zu %s:%u\n",
 				l->total, l->allocations, l->f->fn, l->lineno);
 		}
 	}
@@ -812,7 +827,7 @@ static struct src_loc *src_loc_lookup(const char *str, size_t len)
 	size_t fn_len = c - str;
 	c++;
 	if (*c == '-') {
-		lineno = UINT_MAX;
+		lineno = U24_MAX;
 	} else {
 		lineno = 0;
 		for (; c < end; c++) {
@@ -821,6 +836,8 @@ static struct src_loc *src_loc_lookup(const char *str, size_t len)
 			lineno *= 10;
 			lineno += (*c - '0');
 		}
+		if (lineno > U24_MAX)
+			return NULL;
 	}
 	rcu_read_lock();
 	struct src_file *f = src_file_get(t, &sf.sf, str, fn_len);
@@ -953,19 +970,20 @@ static struct src_loc *mwrap_get(const char *str, size_t len)
 
 static struct src_loc *mwrap_get_bin(const char *buf, size_t len)
 {
-	if ((len % sizeof(void *)) == 0 && len >= (2 * sizeof(void *))) {
-		union stk_bt k;
+	static const size_t min_len = sizeof(struct src_file *) + SIZEOF_LINENO;
+
+	if (len >= min_len && ((len - min_len) % sizeof(void *)) == 0) {
 		struct cds_lfht *t = CMM_LOAD_SHARED(totals);
 		if (!t) return NULL;
 
-		k.sl.bt_len = len / sizeof(void *);
-		k.sl.bt_len -= 2; /* lineno + src_file *f */
+		union stk_bt k;
+		size_t bt_len = (len - min_len) / sizeof(void *);
 
-		if (k.sl.bt_len > MWRAP_BT_MAX)
+		if (bt_len > MWRAP_BT_MAX)
 			return NULL;
+		k.sl.bt_len = bt_len;
 
-		memcpy(&k.sl.f, buf, len);
-
+		memcpy(src_loc_hash_tip(&k.sl), buf, len);
 		hash_src_loc(&k.sl);
 		rcu_read_lock();
 		struct src_loc *l = src_loc_get(t, &k.sl);

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/7] core: drop FreeBSD-specific mutex initialization
  2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
  2022-12-19 11:19 ` [PATCH 1/7] core: shrink src_loc 8 bytes on x86-64 Eric Wong
  2022-12-19 11:19 ` [PATCH 2/7] core: shrink src_loc by another " Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
  2022-12-19 11:19 ` [PATCH 4/7] calloc: consolidate ENOMEM handling Eric Wong
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
  To: mwrap-perl

From: Eric Wong <mwrap-perl@80x24.org>

It's no longer needed with our embedded malloc and our
non-glibc ensure_initialization() mutex avoidance hack.
---
 mwrap_core.h | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/mwrap_core.h b/mwrap_core.h
index 7f52f39..b382018 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -87,18 +87,11 @@ union padded_mutex {
 	char pad[64]; /* cache alignment for common CPUs */
 };
 
-/* a round-robin pool of mutexes */
+/* a pool of mutexes for all "struct src_loc" */
 #define MUTEX_NR   (1 << 6)
 #define MUTEX_MASK (MUTEX_NR - 1)
-#ifdef __FreeBSD__
-#  define STATIC_MTX_INIT_OK (0)
-#else /* only tested on Linux + glibc */
-#  define STATIC_MTX_INIT_OK (1)
-#endif
 static union padded_mutex mutexes[MUTEX_NR] = {
-#if STATIC_MTX_INIT_OK
 	[0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
-#endif
 };
 
 #ifdef static_assert
@@ -1010,12 +1003,6 @@ __attribute__((constructor)) static void mwrap_ctor(void)
 	ensure_initialization();
 	CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor));
 
-	/*
-	 * PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization,
-	 * which happens at pthread_mutex_lock, and that calls calloc
-	 */
-	if (!STATIC_MTX_INIT_OK)
-		reset_mutexes();
 	/* initialize mutexes used by urcu-bp */
 	CMM_STORE_SHARED(files, lfht_new(256));
 	if (!CMM_LOAD_SHARED(files))

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/7] calloc: consolidate ENOMEM handling
  2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
                   ` (2 preceding siblings ...)
  2022-12-19 11:19 ` [PATCH 3/7] core: drop FreeBSD-specific mutex initialization Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
  2022-12-19 11:19 ` [PATCH 5/7] *alloc: limit scope of RCU lock Eric Wong
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
  To: mwrap-perl

From: Eric Wong <mwrap-perl@80x24.org>

We can simplify the error paths in calloc and call memset() outside
of the RCU critical section.
---
 mwrap_core.h | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/mwrap_core.h b/mwrap_core.h
index b382018..14e5d79 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -662,8 +662,7 @@ void *malloc(size_t size)
 		p = hdr2ptr(h);
 	}
 	update_stats_rcu_unlock(l);
-	if (caa_unlikely(!p)) errno = ENOMEM;
-	return p;
+	if (p) return p;
 enomem:
 	errno = ENOMEM;
 	return 0;
@@ -674,14 +673,10 @@ void *calloc(size_t nmemb, size_t size)
 	size_t asize;
 	size_t generation = 0;
 
-	if (__builtin_mul_overflow(size, nmemb, &size)) {
-		errno = ENOMEM;
-		return 0;
-	}
-	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
-		errno = ENOMEM;
-		return 0;
-	}
+	if (__builtin_mul_overflow(size, nmemb, &size))
+		goto enomem;
+	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
+		goto enomem;
 	struct alloc_hdr *h;
 	SRC_LOC_BT(bt);
 	struct src_loc *l = update_stats_rcu_lock(&generation, size, &bt.sl);
@@ -689,11 +684,12 @@ void *calloc(size_t nmemb, size_t size)
 	if (p) {
 		alloc_insert_rcu(l, h, size, h, generation);
 		p = hdr2ptr(h);
-		memset(p, 0, size);
 	}
 	update_stats_rcu_unlock(l);
-	if (caa_unlikely(!p)) errno = ENOMEM;
-	return p;
+	if (p) return memset(p, 0, size);
+enomem:
+	errno = ENOMEM;
+	return 0;
 }
 
 void *realloc(void *ptr, size_t size)

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 5/7] *alloc: limit scope of RCU lock
  2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
                   ` (3 preceding siblings ...)
  2022-12-19 11:19 ` [PATCH 4/7] calloc: consolidate ENOMEM handling Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
  2022-12-19 11:19 ` [PATCH 6/7] core: simplify callers of alloc_insert_rcu Eric Wong
  2022-12-19 11:19 ` [PATCH 7/7] README: add some caveats (signalfd usage w/ current URCU) Eric Wong
  6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
  To: mwrap-perl

From: Eric Wong <mwrap-perl@80x24.org>

We can delay taking the RCU lock until the real_malloc() call
succeeds.  This gives us accurate stats in case of ENOMEM
failures and reduces our LoC a bit, too.
---
 mwrap_core.h | 62 ++++++++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/mwrap_core.h b/mwrap_core.h
index 14e5d79..3161243 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -509,8 +509,6 @@ static void
 alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size,
 		void *real, size_t generation)
 {
-	/* we need src_loc to remain alive for the duration of this call */
-	if (!h) return;
 	h->size = size;
 	h->real = real;
 	h->as.live.loc = l;
@@ -540,11 +538,8 @@ static bool is_power_of_two(size_t n)
 static int
 mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl)
 {
-	struct src_loc *l;
-	struct alloc_hdr *h;
 	void *real;
 	size_t asize;
-	size_t generation = 0;
 	size_t d = alignment / sizeof(void*);
 	size_t r = alignment % sizeof(void*);
 
@@ -563,18 +558,18 @@ mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl)
 	    __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
 		return ENOMEM;
 
-	l = update_stats_rcu_lock(&generation, size, sl);
-
 	real = real_malloc(asize);
 	if (real) {
 		void *p = hdr2ptr(real);
 		if (!ptr_is_aligned(p, alignment))
 			p = ptr_align(p, alignment);
-		h = ptr2hdr(p);
-		alloc_insert_rcu(l, h, size, real, generation);
+		struct alloc_hdr *h = ptr2hdr(p);
+		size_t gen = 0;
+		struct src_loc *l = update_stats_rcu_lock(&gen, size, sl);
+		alloc_insert_rcu(l, h, size, real, gen);
+		update_stats_rcu_unlock(l);
 		*pp = p;
 	}
-	update_stats_rcu_unlock(l);
 
 	return real ? 0 : ENOMEM;
 }
@@ -652,16 +647,16 @@ void *malloc(size_t size)
 	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
 		goto enomem;
 
-	size_t generation = 0;
-	SRC_LOC_BT(bt);
-	struct src_loc *l = update_stats_rcu_lock(&generation, size, &bt.sl);
-	struct alloc_hdr *h;
-	void *p = h = real_malloc(asize);
-	if (h) {
-		alloc_insert_rcu(l, h, size, h, generation);
+	void *p = real_malloc(asize);
+	if (p) {
+		size_t gen = 0;
+		SRC_LOC_BT(bt);
+		struct alloc_hdr *h = p;
+		struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
+		alloc_insert_rcu(l, h, size, h, gen);
+		update_stats_rcu_unlock(l);
 		p = hdr2ptr(h);
 	}
-	update_stats_rcu_unlock(l);
 	if (p) return p;
 enomem:
 	errno = ENOMEM;
@@ -671,22 +666,21 @@ enomem:
 void *calloc(size_t nmemb, size_t size)
 {
 	size_t asize;
-	size_t generation = 0;
 
 	if (__builtin_mul_overflow(size, nmemb, &size))
 		goto enomem;
 	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
 		goto enomem;
-	struct alloc_hdr *h;
-	SRC_LOC_BT(bt);
-	struct src_loc *l = update_stats_rcu_lock(&generation, size, &bt.sl);
-	void *p = h = real_malloc(asize);
+	void *p = real_malloc(asize);
 	if (p) {
-		alloc_insert_rcu(l, h, size, h, generation);
-		p = hdr2ptr(h);
+		size_t gen = 0;
+		struct alloc_hdr *h = p;
+		SRC_LOC_BT(bt);
+		struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
+		alloc_insert_rcu(l, h, size, h, gen);
+		update_stats_rcu_unlock(l);
+		return memset(hdr2ptr(h), 0, size);
 	}
-	update_stats_rcu_unlock(l);
-	if (p) return memset(p, 0, size);
 enomem:
 	errno = ENOMEM;
 	return 0;
@@ -704,16 +698,16 @@ void *realloc(void *ptr, size_t size)
 		errno = ENOMEM;
 		return 0;
 	}
-	struct alloc_hdr *h;
-	size_t generation = 0;
-	SRC_LOC_BT(bt);
-	struct src_loc *l = update_stats_rcu_lock(&generation, size, &bt.sl);
-	void *p = h = real_malloc(asize);
+	void *p = real_malloc(asize);
 	if (p) {
-		alloc_insert_rcu(l, h, size, h, generation);
+		size_t gen = 0;
+		struct alloc_hdr *h = p;
+		SRC_LOC_BT(bt);
+		struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
+		alloc_insert_rcu(l, h, size, h, gen);
+		update_stats_rcu_unlock(l);
 		p = hdr2ptr(h);
 	}
-	update_stats_rcu_unlock(l);
 
 	if (ptr && p) {
 		struct alloc_hdr *old = ptr2hdr(ptr);

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 6/7] core: simplify callers of alloc_insert_rcu
  2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
                   ` (4 preceding siblings ...)
  2022-12-19 11:19 ` [PATCH 5/7] *alloc: limit scope of RCU lock Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
  2022-12-19 11:19 ` [PATCH 7/7] README: add some caveats (signalfd usage w/ current URCU) Eric Wong
  6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
  To: mwrap-perl

From: Eric Wong <mwrap-perl@80x24.org>

We can actually handle all RCU locking/unlocking inside
alloc_insert_rcu to simplify callers.
---
 mwrap_core.h | 60 +++++++++++++++++++---------------------------------
 1 file changed, 22 insertions(+), 38 deletions(-)

diff --git a/mwrap_core.h b/mwrap_core.h
index 3161243..b014f5d 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -319,11 +319,6 @@ again:
 	return l;
 }
 
-static void update_stats_rcu_unlock(const struct src_loc *l)
-{
-	if (caa_likely(l)) rcu_read_unlock();
-}
-
 static const COP *mwp_curcop(void)
 {
 #if MWRAP_PERL
@@ -506,17 +501,20 @@ void free(void *p)
 }
 
 static void
-alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size,
-		void *real, size_t generation)
+alloc_insert_rcu(struct src_loc *sl, struct alloc_hdr *h, size_t size,
+		void *real)
 {
 	h->size = size;
 	h->real = real;
+	size_t gen = 0;
+	struct src_loc *l = update_stats_rcu_lock(&gen, size, sl);
 	h->as.live.loc = l;
-	h->as.live.gen = generation;
+	h->as.live.gen = gen;
 	if (l) {
 		pthread_mutex_t *mtx = src_loc_mutex_lock(l);
 		cds_list_add_rcu(&h->anode, &l->allocs);
 		CHECK(int, 0, pthread_mutex_unlock(mtx));
+		rcu_read_unlock();
 	}
 }
 
@@ -564,10 +562,7 @@ mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl)
 		if (!ptr_is_aligned(p, alignment))
 			p = ptr_align(p, alignment);
 		struct alloc_hdr *h = ptr2hdr(p);
-		size_t gen = 0;
-		struct src_loc *l = update_stats_rcu_lock(&gen, size, sl);
-		alloc_insert_rcu(l, h, size, real, gen);
-		update_stats_rcu_unlock(l);
+		alloc_insert_rcu(sl, h, size, real);
 		*pp = p;
 	}
 
@@ -649,15 +644,11 @@ void *malloc(size_t size)
 
 	void *p = real_malloc(asize);
 	if (p) {
-		size_t gen = 0;
 		SRC_LOC_BT(bt);
 		struct alloc_hdr *h = p;
-		struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
-		alloc_insert_rcu(l, h, size, h, gen);
-		update_stats_rcu_unlock(l);
-		p = hdr2ptr(h);
+		alloc_insert_rcu(&bt.sl, h, size, h);
+		return hdr2ptr(h);
 	}
-	if (p) return p;
 enomem:
 	errno = ENOMEM;
 	return 0;
@@ -673,12 +664,9 @@ void *calloc(size_t nmemb, size_t size)
 		goto enomem;
 	void *p = real_malloc(asize);
 	if (p) {
-		size_t gen = 0;
 		struct alloc_hdr *h = p;
 		SRC_LOC_BT(bt);
-		struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
-		alloc_insert_rcu(l, h, size, h, gen);
-		update_stats_rcu_unlock(l);
+		alloc_insert_rcu(&bt.sl, h, size, h);
 		return memset(hdr2ptr(h), 0, size);
 	}
 enomem:
@@ -694,28 +682,24 @@ void *realloc(void *ptr, size_t size)
 		free(ptr);
 		return 0;
 	}
-	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
-		errno = ENOMEM;
-		return 0;
-	}
+	if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
+		goto enomem;
 	void *p = real_malloc(asize);
 	if (p) {
-		size_t gen = 0;
 		struct alloc_hdr *h = p;
 		SRC_LOC_BT(bt);
-		struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
-		alloc_insert_rcu(l, h, size, h, gen);
-		update_stats_rcu_unlock(l);
+		alloc_insert_rcu(&bt.sl, h, size, h);
 		p = hdr2ptr(h);
+		if (ptr) {
+			struct alloc_hdr *old = ptr2hdr(ptr);
+			memcpy(p, ptr, old->size < size ? old->size : size);
+			free(ptr);
+		}
+		return p;
 	}
-
-	if (ptr && p) {
-		struct alloc_hdr *old = ptr2hdr(ptr);
-		memcpy(p, ptr, old->size < size ? old->size : size);
-		free(ptr);
-	}
-	if (caa_unlikely(!p)) errno = ENOMEM;
-	return p;
+enomem:
+	errno = ENOMEM;
+	return 0;
 }
 
 struct dump_arg {

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 7/7] README: add some caveats (signalfd usage w/ current URCU)
  2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
                   ` (5 preceding siblings ...)
  2022-12-19 11:19 ` [PATCH 6/7] core: simplify callers of alloc_insert_rcu Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
  2022-12-19 22:57   ` [WIP] attempt to workaround older URCU for signalfd Eric Wong
  6 siblings, 1 reply; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
  To: mwrap-perl

Fortunately, there aren't a lot of signalfd users right now
(except me :x).  In any case, this problem should sort itself
out in a few years when new URCU releases replace old ones.

Also, only the EXTREMELY EXPENSIVE ENTERPRISE EDITION will be
able to support Perl files with over 16.7M lines in them :P
---
 README | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/README b/README
index ba95d47..0acbe50 100644
--- a/README
+++ b/README
@@ -61,6 +61,11 @@ malloc locations.
 
 * 32-bit machines are prone to overflow (WONTFIX)
 
+* signalfd(2)-reliant code will need latest URCU with commit
+  ea3a28a3f71dd02f (Disable signals in URCU background threads, 2022-09-23)
+
+* Perl source files over 16.7 million lines long are not supported :P
+
 == Public mail archives (HTTP, Atom feeds, IMAP mailbox, NNTP group):
 
 	https://80x24.org/mwrap-perl/

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [WIP] attempt to workaround older URCU for signalfd
  2022-12-19 11:19 ` [PATCH 7/7] README: add some caveats (signalfd usage w/ current URCU) Eric Wong
@ 2022-12-19 22:57   ` Eric Wong
  0 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 22:57 UTC (permalink / raw)
  To: mwrap-perl

Eric Wong <e@80x24.org> wrote:
> Fortunately, there aren't a lot of signalfd users right now
> (except me :x).  In any case, this problem should sort itself
> out in a few years when new URCU releases replace old ones.

> +* signalfd(2)-reliant code will need latest URCU with commit
> +  ea3a28a3f71dd02f (Disable signals in URCU background threads, 2022-09-23)

Fwiw, this was my attempt to workaround current (released) URCU
for fork() users.  Maybe I missed something, but I was still
getting one thread with signals unmasked after forking.  Not
sure it's worth my time to check since I can easily backport
URCU commit ea3a28a3f71dd02f for my own use.  (URCU is fairly
small and fast to build)

diff --git a/mwrap_core.h b/mwrap_core.h
index b014f5d..6d3d474 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -963,17 +963,32 @@ static struct src_loc *mwrap_get_bin(const char *buf, size_t len)
 static const char *mwrap_env;
 #include "httpd.h"
 
+/* force call_rcu to start background thread while signals are blocked */
+static void start_call_rcu(void)
+{
+	struct alloc_hdr *h = real_malloc(sizeof(struct alloc_hdr));
+	if (h) {
+		h->real = h;
+		call_rcu(&h->as.dead, free_hdr_rcu);
+	} else
+		fprintf(stderr, "malloc: %m\n");
+	struct cds_lfht *ht = lfht_new(1);
+	if (ht)
+		cds_lfht_destroy(ht, NULL);
+	else
+		fprintf(stderr, "cds_lfht_new: %m\n");
+}
+
 __attribute__((constructor)) static void mwrap_ctor(void)
 {
-	sigset_t set, old;
-	struct alloc_hdr *h;
 	mwrap_env = getenv("MWRAP");
 
 	++locating;
 
 	/* block signals */
-	CHECK(int, 0, sigfillset(&set));
-	CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &set, &old));
+	sigset_t blk, old;
+	CHECK(int, 0, sigfillset(&blk));
+	CHECK(int, 0, pthread_sigmask(SIG_BLOCK, &blk, &old));
 	ensure_initialization();
 	CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor));
 
@@ -984,13 +999,7 @@ __attribute__((constructor)) static void mwrap_ctor(void)
 	CMM_STORE_SHARED(totals, lfht_new(16384));
 	if (!CMM_LOAD_SHARED(totals))
 		fprintf(stderr, "failed to allocate totals table\n");
-	h = real_malloc(sizeof(struct alloc_hdr));
-	if (h) { /* force call_rcu to start background thread */
-		h->real = h;
-		call_rcu(&h->as.dead, free_hdr_rcu);
-	} else
-		fprintf(stderr, "malloc: %m\n");
-
+	start_call_rcu();
 	h1d_start();
 	CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL));
 	CHECK(int, 0, pthread_atfork(atfork_prepare, atfork_parent,
diff --git a/mymalloc.h b/mymalloc.h
index 518cce3..e611485 100644
--- a/mymalloc.h
+++ b/mymalloc.h
@@ -41,6 +41,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <errno.h>
+#include <signal.h>
 
 /* this is fine on most x86-64, especially with file-backed mmap(2) */
 #define DEFAULT_GRANULARITY (64U * 1024U * 1024U)
@@ -126,6 +127,7 @@ ATTR_COLD static void mstate_tsd_dtor(void *p)
 	CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
 }
 
+static void start_call_rcu(void);
 /* see httpd.h */
 static void h1d_atfork_prepare(void);
 static void h1d_atfork_parent(void);
@@ -141,9 +143,14 @@ ATTR_COLD static void atfork_prepare(void)
 ATTR_COLD static void atfork_parent(void)
 {
 	CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
+	sigset_t blk, old;
+	CHECK(int, 0, sigfillset(&blk));
+	CHECK(int, 0, pthread_sigmask(SIG_BLOCK, &blk, &old));
 	call_rcu_after_fork_parent();
 	CHECK(int, 0, pthread_mutex_lock(&global_mtx));
 	h1d_atfork_parent();
+	start_call_rcu();
+	CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL));
 	CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
 }
 
@@ -166,8 +173,13 @@ ATTR_COLD static void atfork_child(void)
 		cds_list_add(&ms_tsd->arena_node, &arenas_active);
 	}
 	reset_mutexes();
+	sigset_t blk, old;
+	CHECK(int, 0, sigfillset(&blk));
+	CHECK(int, 0, pthread_sigmask(SIG_BLOCK, &blk, &old));
 	call_rcu_after_fork_child();
 	h1d_start();
+	start_call_rcu();
+	CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL));
 }
 
 #if defined(__GLIBC__)

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-12-19 22:57 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
2022-12-19 11:19 ` [PATCH 1/7] core: shrink src_loc 8 bytes on x86-64 Eric Wong
2022-12-19 11:19 ` [PATCH 2/7] core: shrink src_loc by another " Eric Wong
2022-12-19 11:19 ` [PATCH 3/7] core: drop FreeBSD-specific mutex initialization Eric Wong
2022-12-19 11:19 ` [PATCH 4/7] calloc: consolidate ENOMEM handling Eric Wong
2022-12-19 11:19 ` [PATCH 5/7] *alloc: limit scope of RCU lock Eric Wong
2022-12-19 11:19 ` [PATCH 6/7] core: simplify callers of alloc_insert_rcu Eric Wong
2022-12-19 11:19 ` [PATCH 7/7] README: add some caveats (signalfd usage w/ current URCU) Eric Wong
2022-12-19 22:57   ` [WIP] attempt to workaround older URCU for signalfd Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mwrap-perl.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).