* [PATCH 0/7] shrinkage...
@ 2022-12-19 11:19 Eric Wong
2022-12-19 11:19 ` [PATCH 1/7] core: shrink src_loc 8 bytes on x86-64 Eric Wong
` (6 more replies)
0 siblings, 7 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
To: mwrap-perl
"struct src_loc" gets a small diet, and there's a bunch of
code simplification and golfing as well.
Eric Wong (7):
core: shrink src_loc 8 bytes on x86-64
core: shrink src_loc by another 8 bytes on x86-64
core: drop FreeBSD-specific mutex initialization
calloc: consolidate ENOMEM handling
*alloc: limit scope of RCU lock
core: simplify callers of alloc_insert_rcu
README: add some caveats (signalfd usage w/ current URCU)
Mwrap.xs | 4 +-
README | 5 ++
httpd.h | 6 +-
mwrap_core.h | 208 +++++++++++++++++++++++----------------------------
4 files changed, 103 insertions(+), 120 deletions(-)
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/7] core: shrink src_loc 8 bytes on x86-64
2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
2022-12-19 11:19 ` [PATCH 2/7] core: shrink src_loc by another " Eric Wong
` (5 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
To: mwrap-perl
We can rely on the stable value of ->loc_hash to assign the
mutex at when it requires locking rather than relying on a
monotonically increasing counter.
---
mwrap_core.h | 23 +++++++++++------------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/mwrap_core.h b/mwrap_core.h
index 7681ca5..7be0a7a 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -93,7 +93,6 @@ union padded_mutex {
#else /* only tested on Linux + glibc */
# define STATIC_MTX_INIT_OK (1)
#endif
-static size_t mutex_i;
static union padded_mutex mutexes[MUTEX_NR] = {
#if STATIC_MTX_INIT_OK
[0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
@@ -105,11 +104,6 @@ static union padded_mutex mutexes[MUTEX_NR] = {
static_assert(UINT32_MAX > PATH_MAX, "UINT32_MAX > PATH_MAX");
#endif
-static pthread_mutex_t *mutex_assign(void)
-{
- return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
-}
-
static struct cds_lfht *lfht_new(size_t size)
{
return cds_lfht_new(size, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
@@ -163,7 +157,6 @@ struct src_file {
/* allocated via real_malloc, immortal for safety reasons */
struct src_loc {
- pthread_mutex_t *mtx;
size_t total;
size_t freed_bytes;
size_t allocations;
@@ -309,7 +302,6 @@ again:
if (!l) return l;
memcpy(l, k, n);
l->freed_bytes = 0;
- l->mtx = mutex_assign();
l->age_total = 0;
l->max_lifespan = 0;
l->freed_bytes = 0;
@@ -470,6 +462,13 @@ static void free_hdr_rcu(struct rcu_head *dead)
real_free(h->real);
}
+static pthread_mutex_t *src_loc_mutex_lock(const struct src_loc *l)
+{
+ pthread_mutex_t *mtx = &mutexes[l->loc_hash & MUTEX_MASK].mtx;
+ CHECK(int, 0, pthread_mutex_lock(mtx));
+ return mtx;
+}
+
void free(void *p)
{
if (p) {
@@ -485,11 +484,11 @@ void free(void *p)
uatomic_inc(&l->frees);
uatomic_add(&l->age_total, age);
- CHECK(int, 0, pthread_mutex_lock(l->mtx));
+ pthread_mutex_t *mtx = src_loc_mutex_lock(l);
cds_list_del_rcu(&h->anode);
if (age > l->max_lifespan)
l->max_lifespan = age;
- CHECK(int, 0, pthread_mutex_unlock(l->mtx));
+ CHECK(int, 0, pthread_mutex_unlock(mtx));
call_rcu(&h->as.dead, free_hdr_rcu);
} else {
@@ -509,9 +508,9 @@ alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size,
h->as.live.loc = l;
h->as.live.gen = generation;
if (l) {
- CHECK(int, 0, pthread_mutex_lock(l->mtx));
+ pthread_mutex_t *mtx = src_loc_mutex_lock(l);
cds_list_add_rcu(&h->anode, &l->allocs);
- CHECK(int, 0, pthread_mutex_unlock(l->mtx));
+ CHECK(int, 0, pthread_mutex_unlock(mtx));
}
}
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/7] core: shrink src_loc by another 8 bytes on x86-64
2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
2022-12-19 11:19 ` [PATCH 1/7] core: shrink src_loc 8 bytes on x86-64 Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
2022-12-19 11:19 ` [PATCH 3/7] core: drop FreeBSD-specific mutex initialization Eric Wong
` (4 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
To: mwrap-perl
lineno shouldn't need more than 24-bits (or >= 16.7 million).
Our bt_len is capped at 32, and nobody should ever need to care
about tracking allocations if call stack depths exceed 256.
---
Mwrap.xs | 4 ++--
httpd.h | 6 ++---
mwrap_core.h | 62 +++++++++++++++++++++++++++++++++-------------------
3 files changed, 45 insertions(+), 27 deletions(-)
diff --git a/Mwrap.xs b/Mwrap.xs
index 846d89d..846d0cb 100644
--- a/Mwrap.xs
+++ b/Mwrap.xs
@@ -19,10 +19,10 @@ static SV *location_string(struct src_loc *l)
if (l->f) {
sv_catpv(ret, l->f->fn);
- if (l->lineno == UINT_MAX)
+ if (l->lineno == U24_MAX)
sv_catpvs(ret, ":-");
else
- sv_catpvf(ret, ":%zu", l->lineno);
+ sv_catpvf(ret, ":%u", l->lineno);
}
if (l->bt_len) {
AUTO_FREE char **s = bt_syms(l->bt, l->bt_len);
diff --git a/httpd.h b/httpd.h
index 8e58286..eddea97 100644
--- a/httpd.h
+++ b/httpd.h
@@ -473,10 +473,10 @@ static off_t write_loc_name(FILE *fp, const struct src_loc *l)
}
if (l->f) {
fputs(l->f->fn, fp);
- if (l->lineno == UINT_MAX)
+ if (l->lineno == U24_MAX)
FPUTS(":-", fp);
else
- fprintf(fp, ":%zu", l->lineno);
+ fprintf(fp, ":%u", l->lineno);
}
if (l->bt_len) {
AUTO_FREE char **s = bt_syms(l->bt, l->bt_len);
@@ -703,7 +703,7 @@ static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
hsl->live, hsl->mean_life, hsl->max_life);
FPUTS("<td><a\nhref=\"../at/", fp);
- write_b64_url(fp, (const void *)&hsl->sl->f,
+ write_b64_url(fp, src_loc_hash_tip(hsl->sl),
src_loc_hash_len(hsl->sl));
FPUTS("\">", fp);
diff --git a/mwrap_core.h b/mwrap_core.h
index 7be0a7a..7f52f39 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -61,6 +61,8 @@ typedef void COP;
# include "jhash.h"
#endif
+#define U24_MAX (1U << 24)
+
/*
* Perl doesn't have a GC the same way (C) Ruby does, so no GC count.
* Instead, the relative age of an object is the number of total bytes
@@ -166,18 +168,15 @@ struct src_loc {
struct cds_lfht_node hnode; /* <=> totals table */
struct cds_list_head allocs; /* <=> alloc_hdr.node */
uint32_t loc_hash;
- uint32_t bt_len;
+ uint8_t bt_len;
/* next 3 fields contiguous for hash_src_loc(): */
+ unsigned lineno:24; /* nobody should have >=16.7 LoC in one file */
struct src_file *f;
- /* .lineno only needs `unsigned', but is size_t for alignment */
- size_t lineno;
void *bt[];
-};
+} __attribute__((packed,aligned(8)));
-#ifdef static_assert
-static_assert(sizeof(struct src_file *) == sizeof(size_t),
- "size_t is the same size as a pointer");
-#endif
+/* sizeof() doesn't work on bitfields */
+#define SIZEOF_LINENO (size_t)(24 / 8)
/*
* Every allocation has this in the header, maintain alignment with malloc
@@ -216,13 +215,13 @@ union stk_bt {
* thing.
*/
#ifdef static_assert
-static_assert(offsetof(struct src_loc, lineno) + sizeof(void *) ==
+static_assert(offsetof(struct src_loc, f) + sizeof(void *) ==
offsetof(struct src_loc, bt),
"bt lineno is is bt[-1]");
#endif
static void **bt_dst(union stk_bt *bt)
{
- return (void **)&bt->sl.lineno;
+ return (void **)&bt->sl.f;
}
static struct alloc_hdr *ptr2hdr(void *p)
@@ -247,7 +246,12 @@ static size_t bt_bytelen(const struct src_loc *l)
static size_t src_loc_hash_len(const struct src_loc *l)
{
- return sizeof(l->f) + sizeof(l->lineno) + + bt_bytelen(l);
+ return sizeof(l->f) + SIZEOF_LINENO + bt_bytelen(l);
+}
+
+static void *src_loc_hash_tip(const struct src_loc *l)
+{
+ return (void *)((uintptr_t)&l->bt_len + sizeof(l->bt_len));
}
static int loc_eq(struct cds_lfht_node *node, const void *key)
@@ -258,7 +262,8 @@ static int loc_eq(struct cds_lfht_node *node, const void *key)
existing = caa_container_of(node, struct src_loc, hnode);
return (k->bt_len == existing->bt_len &&
- !memcmp(&k->f, &existing->f, src_loc_hash_len(k)));
+ !memcmp(src_loc_hash_tip(k), src_loc_hash_tip(existing),
+ src_loc_hash_len(k)));
}
static int fn_eq(struct cds_lfht_node *node, const void *key)
@@ -356,7 +361,7 @@ static uint32_t do_hash(const void *p, size_t len)
static void hash_src_loc(struct src_loc *l)
{
- l->loc_hash = do_hash(&l->f, src_loc_hash_len(l));
+ l->loc_hash = do_hash(src_loc_hash_tip(l), src_loc_hash_len(l));
}
static struct src_file *src_file_get(struct cds_lfht *t, struct src_file *k,
@@ -401,6 +406,15 @@ static struct src_loc *assign_line(size_t size, const COP *cop,
size_t len = strlen(fn);
if (len >= PATH_MAX)
len = PATH_MAX - 1;
+
+ if (lineno == UINT_MAX) { /* NOLINE in Perl is UINT_MAX */
+ lineno = U24_MAX;
+ } else if (lineno > U24_MAX) {
+ fprintf(stderr,
+ "%s:%u line number exceeds limit (%u), capped\n",
+ fn, lineno, U24_MAX);
+ lineno = U24_MAX;
+ }
again:
f = src_file_get(t, &sf.sf, fn, len);
if (!f) { /* doesn't exist, add a new one */
@@ -417,6 +431,7 @@ again:
goto again;
}
}
+
sl->total = size;
sl->f = f;
sl->lineno = lineno;
@@ -767,7 +782,7 @@ static void *dump_to_file(struct dump_arg *a)
fprintf(a->fp, "%16zu %12zu %s\n",
l->total, l->allocations, s[0]);
} else {
- fprintf(a->fp, "%16zu %12zu %s:%zu\n",
+ fprintf(a->fp, "%16zu %12zu %s:%u\n",
l->total, l->allocations, l->f->fn, l->lineno);
}
}
@@ -812,7 +827,7 @@ static struct src_loc *src_loc_lookup(const char *str, size_t len)
size_t fn_len = c - str;
c++;
if (*c == '-') {
- lineno = UINT_MAX;
+ lineno = U24_MAX;
} else {
lineno = 0;
for (; c < end; c++) {
@@ -821,6 +836,8 @@ static struct src_loc *src_loc_lookup(const char *str, size_t len)
lineno *= 10;
lineno += (*c - '0');
}
+ if (lineno > U24_MAX)
+ return NULL;
}
rcu_read_lock();
struct src_file *f = src_file_get(t, &sf.sf, str, fn_len);
@@ -953,19 +970,20 @@ static struct src_loc *mwrap_get(const char *str, size_t len)
static struct src_loc *mwrap_get_bin(const char *buf, size_t len)
{
- if ((len % sizeof(void *)) == 0 && len >= (2 * sizeof(void *))) {
- union stk_bt k;
+ static const size_t min_len = sizeof(struct src_file *) + SIZEOF_LINENO;
+
+ if (len >= min_len && ((len - min_len) % sizeof(void *)) == 0) {
struct cds_lfht *t = CMM_LOAD_SHARED(totals);
if (!t) return NULL;
- k.sl.bt_len = len / sizeof(void *);
- k.sl.bt_len -= 2; /* lineno + src_file *f */
+ union stk_bt k;
+ size_t bt_len = (len - min_len) / sizeof(void *);
- if (k.sl.bt_len > MWRAP_BT_MAX)
+ if (bt_len > MWRAP_BT_MAX)
return NULL;
+ k.sl.bt_len = bt_len;
- memcpy(&k.sl.f, buf, len);
-
+ memcpy(src_loc_hash_tip(&k.sl), buf, len);
hash_src_loc(&k.sl);
rcu_read_lock();
struct src_loc *l = src_loc_get(t, &k.sl);
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 3/7] core: drop FreeBSD-specific mutex initialization
2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
2022-12-19 11:19 ` [PATCH 1/7] core: shrink src_loc 8 bytes on x86-64 Eric Wong
2022-12-19 11:19 ` [PATCH 2/7] core: shrink src_loc by another " Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
2022-12-19 11:19 ` [PATCH 4/7] calloc: consolidate ENOMEM handling Eric Wong
` (3 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
To: mwrap-perl
From: Eric Wong <mwrap-perl@80x24.org>
It's no longer needed with our embedded malloc and our
non-glibc ensure_initialization() mutex avoidance hack.
---
mwrap_core.h | 15 +--------------
1 file changed, 1 insertion(+), 14 deletions(-)
diff --git a/mwrap_core.h b/mwrap_core.h
index 7f52f39..b382018 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -87,18 +87,11 @@ union padded_mutex {
char pad[64]; /* cache alignment for common CPUs */
};
-/* a round-robin pool of mutexes */
+/* a pool of mutexes for all "struct src_loc" */
#define MUTEX_NR (1 << 6)
#define MUTEX_MASK (MUTEX_NR - 1)
-#ifdef __FreeBSD__
-# define STATIC_MTX_INIT_OK (0)
-#else /* only tested on Linux + glibc */
-# define STATIC_MTX_INIT_OK (1)
-#endif
static union padded_mutex mutexes[MUTEX_NR] = {
-#if STATIC_MTX_INIT_OK
[0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
-#endif
};
#ifdef static_assert
@@ -1010,12 +1003,6 @@ __attribute__((constructor)) static void mwrap_ctor(void)
ensure_initialization();
CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor));
- /*
- * PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization,
- * which happens at pthread_mutex_lock, and that calls calloc
- */
- if (!STATIC_MTX_INIT_OK)
- reset_mutexes();
/* initialize mutexes used by urcu-bp */
CMM_STORE_SHARED(files, lfht_new(256));
if (!CMM_LOAD_SHARED(files))
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4/7] calloc: consolidate ENOMEM handling
2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
` (2 preceding siblings ...)
2022-12-19 11:19 ` [PATCH 3/7] core: drop FreeBSD-specific mutex initialization Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
2022-12-19 11:19 ` [PATCH 5/7] *alloc: limit scope of RCU lock Eric Wong
` (2 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
To: mwrap-perl
From: Eric Wong <mwrap-perl@80x24.org>
We can simplify the error paths in calloc and call memset() outside
of the RCU critical section.
---
mwrap_core.h | 22 +++++++++-------------
1 file changed, 9 insertions(+), 13 deletions(-)
diff --git a/mwrap_core.h b/mwrap_core.h
index b382018..14e5d79 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -662,8 +662,7 @@ void *malloc(size_t size)
p = hdr2ptr(h);
}
update_stats_rcu_unlock(l);
- if (caa_unlikely(!p)) errno = ENOMEM;
- return p;
+ if (p) return p;
enomem:
errno = ENOMEM;
return 0;
@@ -674,14 +673,10 @@ void *calloc(size_t nmemb, size_t size)
size_t asize;
size_t generation = 0;
- if (__builtin_mul_overflow(size, nmemb, &size)) {
- errno = ENOMEM;
- return 0;
- }
- if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
- errno = ENOMEM;
- return 0;
- }
+ if (__builtin_mul_overflow(size, nmemb, &size))
+ goto enomem;
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
+ goto enomem;
struct alloc_hdr *h;
SRC_LOC_BT(bt);
struct src_loc *l = update_stats_rcu_lock(&generation, size, &bt.sl);
@@ -689,11 +684,12 @@ void *calloc(size_t nmemb, size_t size)
if (p) {
alloc_insert_rcu(l, h, size, h, generation);
p = hdr2ptr(h);
- memset(p, 0, size);
}
update_stats_rcu_unlock(l);
- if (caa_unlikely(!p)) errno = ENOMEM;
- return p;
+ if (p) return memset(p, 0, size);
+enomem:
+ errno = ENOMEM;
+ return 0;
}
void *realloc(void *ptr, size_t size)
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 5/7] *alloc: limit scope of RCU lock
2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
` (3 preceding siblings ...)
2022-12-19 11:19 ` [PATCH 4/7] calloc: consolidate ENOMEM handling Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
2022-12-19 11:19 ` [PATCH 6/7] core: simplify callers of alloc_insert_rcu Eric Wong
2022-12-19 11:19 ` [PATCH 7/7] README: add some caveats (signalfd usage w/ current URCU) Eric Wong
6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
To: mwrap-perl
From: Eric Wong <mwrap-perl@80x24.org>
We can delay taking the RCU lock until the real_malloc() call
succeeds. This gives us accurate stats in case of ENOMEM
failures and reduces our LoC a bit, too.
---
mwrap_core.h | 62 ++++++++++++++++++++++++----------------------------
1 file changed, 28 insertions(+), 34 deletions(-)
diff --git a/mwrap_core.h b/mwrap_core.h
index 14e5d79..3161243 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -509,8 +509,6 @@ static void
alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size,
void *real, size_t generation)
{
- /* we need src_loc to remain alive for the duration of this call */
- if (!h) return;
h->size = size;
h->real = real;
h->as.live.loc = l;
@@ -540,11 +538,8 @@ static bool is_power_of_two(size_t n)
static int
mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl)
{
- struct src_loc *l;
- struct alloc_hdr *h;
void *real;
size_t asize;
- size_t generation = 0;
size_t d = alignment / sizeof(void*);
size_t r = alignment % sizeof(void*);
@@ -563,18 +558,18 @@ mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl)
__builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
return ENOMEM;
- l = update_stats_rcu_lock(&generation, size, sl);
-
real = real_malloc(asize);
if (real) {
void *p = hdr2ptr(real);
if (!ptr_is_aligned(p, alignment))
p = ptr_align(p, alignment);
- h = ptr2hdr(p);
- alloc_insert_rcu(l, h, size, real, generation);
+ struct alloc_hdr *h = ptr2hdr(p);
+ size_t gen = 0;
+ struct src_loc *l = update_stats_rcu_lock(&gen, size, sl);
+ alloc_insert_rcu(l, h, size, real, gen);
+ update_stats_rcu_unlock(l);
*pp = p;
}
- update_stats_rcu_unlock(l);
return real ? 0 : ENOMEM;
}
@@ -652,16 +647,16 @@ void *malloc(size_t size)
if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
goto enomem;
- size_t generation = 0;
- SRC_LOC_BT(bt);
- struct src_loc *l = update_stats_rcu_lock(&generation, size, &bt.sl);
- struct alloc_hdr *h;
- void *p = h = real_malloc(asize);
- if (h) {
- alloc_insert_rcu(l, h, size, h, generation);
+ void *p = real_malloc(asize);
+ if (p) {
+ size_t gen = 0;
+ SRC_LOC_BT(bt);
+ struct alloc_hdr *h = p;
+ struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
+ alloc_insert_rcu(l, h, size, h, gen);
+ update_stats_rcu_unlock(l);
p = hdr2ptr(h);
}
- update_stats_rcu_unlock(l);
if (p) return p;
enomem:
errno = ENOMEM;
@@ -671,22 +666,21 @@ enomem:
void *calloc(size_t nmemb, size_t size)
{
size_t asize;
- size_t generation = 0;
if (__builtin_mul_overflow(size, nmemb, &size))
goto enomem;
if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
goto enomem;
- struct alloc_hdr *h;
- SRC_LOC_BT(bt);
- struct src_loc *l = update_stats_rcu_lock(&generation, size, &bt.sl);
- void *p = h = real_malloc(asize);
+ void *p = real_malloc(asize);
if (p) {
- alloc_insert_rcu(l, h, size, h, generation);
- p = hdr2ptr(h);
+ size_t gen = 0;
+ struct alloc_hdr *h = p;
+ SRC_LOC_BT(bt);
+ struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
+ alloc_insert_rcu(l, h, size, h, gen);
+ update_stats_rcu_unlock(l);
+ return memset(hdr2ptr(h), 0, size);
}
- update_stats_rcu_unlock(l);
- if (p) return memset(p, 0, size);
enomem:
errno = ENOMEM;
return 0;
@@ -704,16 +698,16 @@ void *realloc(void *ptr, size_t size)
errno = ENOMEM;
return 0;
}
- struct alloc_hdr *h;
- size_t generation = 0;
- SRC_LOC_BT(bt);
- struct src_loc *l = update_stats_rcu_lock(&generation, size, &bt.sl);
- void *p = h = real_malloc(asize);
+ void *p = real_malloc(asize);
if (p) {
- alloc_insert_rcu(l, h, size, h, generation);
+ size_t gen = 0;
+ struct alloc_hdr *h = p;
+ SRC_LOC_BT(bt);
+ struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
+ alloc_insert_rcu(l, h, size, h, gen);
+ update_stats_rcu_unlock(l);
p = hdr2ptr(h);
}
- update_stats_rcu_unlock(l);
if (ptr && p) {
struct alloc_hdr *old = ptr2hdr(ptr);
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 6/7] core: simplify callers of alloc_insert_rcu
2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
` (4 preceding siblings ...)
2022-12-19 11:19 ` [PATCH 5/7] *alloc: limit scope of RCU lock Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
2022-12-19 11:19 ` [PATCH 7/7] README: add some caveats (signalfd usage w/ current URCU) Eric Wong
6 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
To: mwrap-perl
From: Eric Wong <mwrap-perl@80x24.org>
We can actually handle all RCU locking/unlocking inside
alloc_insert_rcu to simplify callers.
---
mwrap_core.h | 60 +++++++++++++++++++---------------------------------
1 file changed, 22 insertions(+), 38 deletions(-)
diff --git a/mwrap_core.h b/mwrap_core.h
index 3161243..b014f5d 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -319,11 +319,6 @@ again:
return l;
}
-static void update_stats_rcu_unlock(const struct src_loc *l)
-{
- if (caa_likely(l)) rcu_read_unlock();
-}
-
static const COP *mwp_curcop(void)
{
#if MWRAP_PERL
@@ -506,17 +501,20 @@ void free(void *p)
}
static void
-alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size,
- void *real, size_t generation)
+alloc_insert_rcu(struct src_loc *sl, struct alloc_hdr *h, size_t size,
+ void *real)
{
h->size = size;
h->real = real;
+ size_t gen = 0;
+ struct src_loc *l = update_stats_rcu_lock(&gen, size, sl);
h->as.live.loc = l;
- h->as.live.gen = generation;
+ h->as.live.gen = gen;
if (l) {
pthread_mutex_t *mtx = src_loc_mutex_lock(l);
cds_list_add_rcu(&h->anode, &l->allocs);
CHECK(int, 0, pthread_mutex_unlock(mtx));
+ rcu_read_unlock();
}
}
@@ -564,10 +562,7 @@ mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl)
if (!ptr_is_aligned(p, alignment))
p = ptr_align(p, alignment);
struct alloc_hdr *h = ptr2hdr(p);
- size_t gen = 0;
- struct src_loc *l = update_stats_rcu_lock(&gen, size, sl);
- alloc_insert_rcu(l, h, size, real, gen);
- update_stats_rcu_unlock(l);
+ alloc_insert_rcu(sl, h, size, real);
*pp = p;
}
@@ -649,15 +644,11 @@ void *malloc(size_t size)
void *p = real_malloc(asize);
if (p) {
- size_t gen = 0;
SRC_LOC_BT(bt);
struct alloc_hdr *h = p;
- struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
- alloc_insert_rcu(l, h, size, h, gen);
- update_stats_rcu_unlock(l);
- p = hdr2ptr(h);
+ alloc_insert_rcu(&bt.sl, h, size, h);
+ return hdr2ptr(h);
}
- if (p) return p;
enomem:
errno = ENOMEM;
return 0;
@@ -673,12 +664,9 @@ void *calloc(size_t nmemb, size_t size)
goto enomem;
void *p = real_malloc(asize);
if (p) {
- size_t gen = 0;
struct alloc_hdr *h = p;
SRC_LOC_BT(bt);
- struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
- alloc_insert_rcu(l, h, size, h, gen);
- update_stats_rcu_unlock(l);
+ alloc_insert_rcu(&bt.sl, h, size, h);
return memset(hdr2ptr(h), 0, size);
}
enomem:
@@ -694,28 +682,24 @@ void *realloc(void *ptr, size_t size)
free(ptr);
return 0;
}
- if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
- errno = ENOMEM;
- return 0;
- }
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
+ goto enomem;
void *p = real_malloc(asize);
if (p) {
- size_t gen = 0;
struct alloc_hdr *h = p;
SRC_LOC_BT(bt);
- struct src_loc *l = update_stats_rcu_lock(&gen, size, &bt.sl);
- alloc_insert_rcu(l, h, size, h, gen);
- update_stats_rcu_unlock(l);
+ alloc_insert_rcu(&bt.sl, h, size, h);
p = hdr2ptr(h);
+ if (ptr) {
+ struct alloc_hdr *old = ptr2hdr(ptr);
+ memcpy(p, ptr, old->size < size ? old->size : size);
+ free(ptr);
+ }
+ return p;
}
-
- if (ptr && p) {
- struct alloc_hdr *old = ptr2hdr(ptr);
- memcpy(p, ptr, old->size < size ? old->size : size);
- free(ptr);
- }
- if (caa_unlikely(!p)) errno = ENOMEM;
- return p;
+enomem:
+ errno = ENOMEM;
+ return 0;
}
struct dump_arg {
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 7/7] README: add some caveats (signalfd usage w/ current URCU)
2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
` (5 preceding siblings ...)
2022-12-19 11:19 ` [PATCH 6/7] core: simplify callers of alloc_insert_rcu Eric Wong
@ 2022-12-19 11:19 ` Eric Wong
2022-12-19 22:57 ` [WIP] attempt to workaround older URCU for signalfd Eric Wong
6 siblings, 1 reply; 9+ messages in thread
From: Eric Wong @ 2022-12-19 11:19 UTC (permalink / raw)
To: mwrap-perl
Fortunately, there aren't a lot of signalfd users right now
(except me :x). In any case, this problem should sort itself
out in a few years when new URCU releases replace old ones.
Also, only the EXTREMELY EXPENSIVE ENTERPRISE EDITION will be
able to support Perl files with over 16.7M lines in them :P
---
README | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/README b/README
index ba95d47..0acbe50 100644
--- a/README
+++ b/README
@@ -61,6 +61,11 @@ malloc locations.
* 32-bit machines are prone to overflow (WONTFIX)
+* signalfd(2)-reliant code will need latest URCU with commit
+ ea3a28a3f71dd02f (Disable signals in URCU background threads, 2022-09-23)
+
+* Perl source files over 16.7 million lines long are not supported :P
+
== Public mail archives (HTTP, Atom feeds, IMAP mailbox, NNTP group):
https://80x24.org/mwrap-perl/
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [WIP] attempt to workaround older URCU for signalfd
2022-12-19 11:19 ` [PATCH 7/7] README: add some caveats (signalfd usage w/ current URCU) Eric Wong
@ 2022-12-19 22:57 ` Eric Wong
0 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2022-12-19 22:57 UTC (permalink / raw)
To: mwrap-perl
Eric Wong <e@80x24.org> wrote:
> Fortunately, there aren't a lot of signalfd users right now
> (except me :x). In any case, this problem should sort itself
> out in a few years when new URCU releases replace old ones.
> +* signalfd(2)-reliant code will need latest URCU with commit
> + ea3a28a3f71dd02f (Disable signals in URCU background threads, 2022-09-23)
Fwiw, this was my attempt to workaround current (released) URCU
for fork() users. Maybe I missed something, but I was still
getting one thread with signals unmasked after forking. Not
sure it's worth my time to check since I can easily backport
URCU commit ea3a28a3f71dd02f for my own use. (URCU is fairly
small and fast to build)
diff --git a/mwrap_core.h b/mwrap_core.h
index b014f5d..6d3d474 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -963,17 +963,32 @@ static struct src_loc *mwrap_get_bin(const char *buf, size_t len)
static const char *mwrap_env;
#include "httpd.h"
+/* force call_rcu to start background thread while signals are blocked */
+static void start_call_rcu(void)
+{
+ struct alloc_hdr *h = real_malloc(sizeof(struct alloc_hdr));
+ if (h) {
+ h->real = h;
+ call_rcu(&h->as.dead, free_hdr_rcu);
+ } else
+ fprintf(stderr, "malloc: %m\n");
+ struct cds_lfht *ht = lfht_new(1);
+ if (ht)
+ cds_lfht_destroy(ht, NULL);
+ else
+ fprintf(stderr, "cds_lfht_new: %m\n");
+}
+
__attribute__((constructor)) static void mwrap_ctor(void)
{
- sigset_t set, old;
- struct alloc_hdr *h;
mwrap_env = getenv("MWRAP");
++locating;
/* block signals */
- CHECK(int, 0, sigfillset(&set));
- CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &set, &old));
+ sigset_t blk, old;
+ CHECK(int, 0, sigfillset(&blk));
+ CHECK(int, 0, pthread_sigmask(SIG_BLOCK, &blk, &old));
ensure_initialization();
CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor));
@@ -984,13 +999,7 @@ __attribute__((constructor)) static void mwrap_ctor(void)
CMM_STORE_SHARED(totals, lfht_new(16384));
if (!CMM_LOAD_SHARED(totals))
fprintf(stderr, "failed to allocate totals table\n");
- h = real_malloc(sizeof(struct alloc_hdr));
- if (h) { /* force call_rcu to start background thread */
- h->real = h;
- call_rcu(&h->as.dead, free_hdr_rcu);
- } else
- fprintf(stderr, "malloc: %m\n");
-
+ start_call_rcu();
h1d_start();
CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL));
CHECK(int, 0, pthread_atfork(atfork_prepare, atfork_parent,
diff --git a/mymalloc.h b/mymalloc.h
index 518cce3..e611485 100644
--- a/mymalloc.h
+++ b/mymalloc.h
@@ -41,6 +41,7 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
+#include <signal.h>
/* this is fine on most x86-64, especially with file-backed mmap(2) */
#define DEFAULT_GRANULARITY (64U * 1024U * 1024U)
@@ -126,6 +127,7 @@ ATTR_COLD static void mstate_tsd_dtor(void *p)
CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
}
+static void start_call_rcu(void);
/* see httpd.h */
static void h1d_atfork_prepare(void);
static void h1d_atfork_parent(void);
@@ -141,9 +143,14 @@ ATTR_COLD static void atfork_prepare(void)
ATTR_COLD static void atfork_parent(void)
{
CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
+ sigset_t blk, old;
+ CHECK(int, 0, sigfillset(&blk));
+ CHECK(int, 0, pthread_sigmask(SIG_BLOCK, &blk, &old));
call_rcu_after_fork_parent();
CHECK(int, 0, pthread_mutex_lock(&global_mtx));
h1d_atfork_parent();
+ start_call_rcu();
+ CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL));
CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
}
@@ -166,8 +173,13 @@ ATTR_COLD static void atfork_child(void)
cds_list_add(&ms_tsd->arena_node, &arenas_active);
}
reset_mutexes();
+ sigset_t blk, old;
+ CHECK(int, 0, sigfillset(&blk));
+ CHECK(int, 0, pthread_sigmask(SIG_BLOCK, &blk, &old));
call_rcu_after_fork_child();
h1d_start();
+ start_call_rcu();
+ CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL));
}
#if defined(__GLIBC__)
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-12-19 22:57 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-19 11:19 [PATCH 0/7] shrinkage Eric Wong
2022-12-19 11:19 ` [PATCH 1/7] core: shrink src_loc 8 bytes on x86-64 Eric Wong
2022-12-19 11:19 ` [PATCH 2/7] core: shrink src_loc by another " Eric Wong
2022-12-19 11:19 ` [PATCH 3/7] core: drop FreeBSD-specific mutex initialization Eric Wong
2022-12-19 11:19 ` [PATCH 4/7] calloc: consolidate ENOMEM handling Eric Wong
2022-12-19 11:19 ` [PATCH 5/7] *alloc: limit scope of RCU lock Eric Wong
2022-12-19 11:19 ` [PATCH 6/7] core: simplify callers of alloc_insert_rcu Eric Wong
2022-12-19 11:19 ` [PATCH 7/7] README: add some caveats (signalfd usage w/ current URCU) Eric Wong
2022-12-19 22:57 ` [WIP] attempt to workaround older URCU for signalfd Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/mwrap-perl.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).