From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Cc: Eric Wong <normalperson@yhbt.net>
Subject: [PATCH 2/8] fix SIGCHLD hijacking race conditions
Date: Mon, 25 Jun 2018 23:50:45 +0000 [thread overview]
Message-ID: <20180625235051.66045-3-e@80x24.org> (raw)
In-Reply-To: <20180625235051.66045-1-e@80x24.org>
From: Eric Wong <normalperson@yhbt.net>
It's a bit tricky, and I introduced yet another sleep function
(rb_thread_sleep_interruptible) to make it work.
Original problem in my patch was holding a native mutex while
forking is dangerous, so I went to normal ruby sleep methods
(same as Mutex and autoload).
However, it's impossible to resume main thread from signal
handler because rb_threadptr_execute_interrupts always restores
th->status after trap handlers. In other words, this doesn't work:
main = Thread.current
trap(:USR1) { main.run }
Thread.new { sleep 0.1; Process.kill(:USR1, $$) }
sleep
So I tried moving rb_sigchld/rb_waitpid_all into timer-thread;
but that's racy and not doable unless the Ruby thread holds
a native mutex while sleeping on a native condvar (w/o GVL).
And that brings us back to the original problem...
(Well, I could use futex :P)
Now, back to using the main thread to handle SIGCHLD...
It seems to be working...
---
mjit.c | 13 ++++--
process.c | 156 +++++++++++++++++++++++++++++---------------------------------
signal.c | 66 +++++++++++---------------
thread.c | 11 +++++
4 files changed, 120 insertions(+), 126 deletions(-)
diff --git a/mjit.c b/mjit.c
index 4aa1524a55..63cd35da21 100644
--- a/mjit.c
+++ b/mjit.c
@@ -120,7 +120,8 @@ extern void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lo
extern int rb_thread_create_mjit_thread(void (*child_hook)(void), void (*worker_func)(void));
-pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options);
+pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options,
+ rb_nativethread_cond_t *cond);
#define RB_CONDATTR_CLOCK_MONOTONIC 1
@@ -408,6 +409,7 @@ exec_process(const char *path, char *const argv[])
int stat, exit_code;
pid_t pid;
rb_vm_t *vm = GET_VM();
+ rb_nativethread_cond_t cond;
rb_nativethread_lock_lock(&vm->waitpid_lock);
pid = start_process(path, argv);
@@ -415,11 +417,13 @@ exec_process(const char *path, char *const argv[])
rb_nativethread_lock_unlock(&vm->waitpid_lock);
return -2;
}
+ rb_native_cond_initialize(&cond);
for (;;) {
- pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0);
+ pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0, &cond);
if (r == -1) {
- if (errno == EINTR) continue;
+ if (errno == EINTR) continue; /* should never happen */
fprintf(stderr, "waitpid: %s\n", strerror(errno));
+ exit_code = -2;
break;
}
else if (r == pid) {
@@ -433,6 +437,7 @@ exec_process(const char *path, char *const argv[])
}
}
rb_nativethread_lock_unlock(&vm->waitpid_lock);
+ rb_native_cond_destroy(&cond);
return exit_code;
}
@@ -1582,7 +1587,7 @@ mjit_finish(void)
absence. So wait for a clean finish of the threads. */
while (pch_status == PCH_NOT_READY) {
verbose(3, "Waiting wakeup from make_pch");
- /* release GVL to handle interrupts */
+ /* release GVL to handle interrupts */
rb_thread_call_without_gvl(wait_pch, 0, ubf_pch, 0);
}
CRITICAL_SECTION_FINISH(3, "in mjit_finish to wakeup from pch");
diff --git a/process.c b/process.c
index c92351b83d..9f0995e194 100644
--- a/process.c
+++ b/process.c
@@ -899,153 +899,143 @@ do_waitpid(rb_pid_t pid, int *st, int flags)
struct waitpid_state {
struct list_node wnode;
- rb_nativethread_cond_t cond;
+ union {
+ rb_nativethread_cond_t *cond; /* non-Ruby thread */
+ rb_execution_context_t *ec; /* normal Ruby execution context */
+ } wake;
rb_pid_t ret;
rb_pid_t pid;
int status;
int options;
int errnum;
- rb_vm_t *vm;
+ unsigned int is_ruby : 1;
};
+void rb_native_mutex_lock(rb_nativethread_lock_t *);
+void rb_native_mutex_unlock(rb_nativethread_lock_t *);
void rb_native_cond_signal(rb_nativethread_cond_t *);
void rb_native_cond_wait(rb_nativethread_cond_t *, rb_nativethread_lock_t *);
-void rb_native_cond_initialize(rb_nativethread_cond_t *);
-void rb_native_cond_destroy(rb_nativethread_cond_t *);
-/* only called by vm->main_thread */
+/* called by vm->main_thread */
void
-rb_sigchld(rb_vm_t *vm)
+rb_waitpid_all(rb_vm_t *vm)
{
struct waitpid_state *w = 0, *next;
- rb_nativethread_lock_lock(&vm->waitpid_lock);
+ rb_native_mutex_lock(&vm->waitpid_lock);
list_for_each_safe(&vm->waiting_pids, w, next, wnode) {
w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+
if (w->ret == 0) continue;
if (w->ret == -1) w->errnum = errno;
+
list_del_init(&w->wnode);
- rb_native_cond_signal(&w->cond);
+ if (w->is_ruby) {
+ rb_thread_wakeup_alive(rb_ec_thread_ptr(w->wake.ec)->self);
+ }
+ else {
+ rb_native_cond_signal(w->wake.cond);
+ }
}
- rb_nativethread_lock_unlock(&vm->waitpid_lock);
+ rb_native_mutex_unlock(&vm->waitpid_lock);
}
static void
-waitpid_state_init(struct waitpid_state *w, rb_vm_t *vm, pid_t pid, int options)
+waitpid_state_init(struct waitpid_state *w, pid_t pid, int options)
{
- rb_native_cond_initialize(&w->cond);
w->ret = 0;
w->pid = pid;
- w->status = 0;
w->options = options;
- w->vm = vm;
list_node_init(&w->wnode);
}
-/* must be called with vm->waitpid_lock held, this is not interruptible */
+/*
+ * must be called with vm->waitpid_lock held, this is not interruptible
+ */
pid_t
-ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options)
+ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options,
+ rb_nativethread_cond_t *cond)
{
struct waitpid_state w;
assert(!ruby_thread_has_gvl_p() && "must not have GVL");
- waitpid_state_init(&w, vm, pid, options);
+ waitpid_state_init(&w, pid, options);
+ w.is_ruby = 0;
w.ret = do_waitpid(w.pid, &w.status, w.options | WNOHANG);
if (w.ret) {
- if (w.ret == -1) {
- w.errnum = errno;
- }
+ if (w.ret == -1) w.errnum = errno;
}
else {
+ w.wake.cond = cond;
list_add(&vm->waiting_pids, &w.wnode);
- while (!w.ret) {
- rb_native_cond_wait(&w.cond, &vm->waitpid_lock);
- }
+ do {
+ rb_native_cond_wait(w.wake.cond, &vm->waitpid_lock);
+ } while (!w.ret);
list_del(&w.wnode);
}
if (status) {
*status = w.status;
}
- rb_native_cond_destroy(&w.cond);
errno = w.errnum;
return w.ret;
}
-static void
-waitpid_ubf(void *x)
-{
- struct waitpid_state *w = x;
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- if (!w->ret) {
- w->errnum = EINTR;
- w->ret = -1;
- }
- rb_native_cond_signal(&w->cond);
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-}
+void rb_thread_sleep_interruptible(struct timespec *ts); /* thread.c */
-static void *
-waitpid_nogvl(void *x)
+static VALUE
+waitpid_sleep(VALUE x)
{
- struct waitpid_state *w = x;
+ struct waitpid_state *w = (struct waitpid_state *)x;
- /* let rb_sigchld handle it */
- rb_native_cond_wait(&w->cond, &w->vm->waitpid_lock);
+ rb_thread_check_ints();
+ while (!w->ret) {
+ rb_thread_sleep_interruptible(0);
+ rb_thread_check_ints();
+ }
- return 0;
+ return Qfalse;
}
static VALUE
-waitpid_wait(VALUE x)
+waitpid_ensure(VALUE x)
{
struct waitpid_state *w = (struct waitpid_state *)x;
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
- if (w->ret) {
- if (w->ret == -1) {
- w->errnum = errno;
- }
- }
- else {
- rb_execution_context_t *ec = GET_EC();
+ if (w->ret == 0) {
+ rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
- list_add(&w->vm->waiting_pids, &w->wnode);
- do {
- rb_thread_call_without_gvl2(waitpid_nogvl, w, waitpid_ubf, w);
- if (RUBY_VM_INTERRUPTED_ANY(ec) ||
- (w->ret == -1 && w->errnum == EINTR)) {
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-
- RUBY_VM_CHECK_INTS(ec);
-
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- if (w->ret == -1 && w->errnum == EINTR) {
- w->ret = do_waitpid(w->pid, &w->status, w->options|WNOHANG);
- if (w->ret == -1)
- w->errnum = errno;
- }
- }
- } while (!w->ret);
+ rb_native_mutex_lock(&vm->waitpid_lock);
+ list_del(&w->wnode);
+ rb_native_mutex_unlock(&vm->waitpid_lock);
}
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
return Qfalse;
}
-static VALUE
-waitpid_ensure(VALUE x)
+static void
+waitpid_wait(struct waitpid_state *w)
{
- struct waitpid_state *w = (struct waitpid_state *)x;
+ rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
- if (w->ret <= 0) {
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- list_del_init(&w->wnode);
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
+ /*
+ * Lock here to prevent do_waitpid from stealing work from the
+ * ruby_waitpid_locked done by mjit workers since mjit works
+ * outside of GVL
+ */
+ rb_native_mutex_lock(&vm->waitpid_lock);
+
+ w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+ if (w->ret) {
+ if (w->ret == -1) w->errnum = errno;
+
+ rb_native_mutex_unlock(&vm->waitpid_lock);
}
+ else {
+ list_add(&vm->waiting_pids, &w->wnode);
+ rb_native_mutex_unlock(&vm->waitpid_lock);
- rb_native_cond_destroy(&w->cond);
- return Qfalse;
+ rb_ensure(waitpid_sleep, (VALUE)w, waitpid_ensure, (VALUE)w);
+ }
}
rb_pid_t
@@ -1059,11 +1049,11 @@ rb_waitpid(rb_pid_t pid, int *st, int flags)
else {
struct waitpid_state w;
- waitpid_state_init(&w, GET_VM(), pid, flags);
- rb_ensure(waitpid_wait, (VALUE)&w, waitpid_ensure, (VALUE)&w);
- if (st) {
- *st = w.status;
- }
+ waitpid_state_init(&w, pid, flags);
+ w.is_ruby = 1;
+ w.wake.ec = GET_EC();
+ waitpid_wait(&w);
+ if (st) *st = w.status;
result = w.ret;
}
if (result > 0) {
diff --git a/signal.c b/signal.c
index c20b01ea36..22fc4286d9 100644
--- a/signal.c
+++ b/signal.c
@@ -66,6 +66,14 @@ ruby_atomic_compare_and_swap(rb_atomic_t *ptr, rb_atomic_t cmp,
# define NSIG (_SIGMAX + 1) /* For QNX */
#endif
+#if defined(SIGCLD)
+# define RUBY_SIGCHLD (SIGCLD)
+#elif defined(SIGCHLD)
+# define RUBY_SIGCHLD (SIGCHLD)
+#else
+# define RUBY_SIGCHLD (0)
+#endif
+
static const struct signals {
const char *signm;
int signo;
@@ -129,15 +137,9 @@ static const struct signals {
#ifdef SIGCONT
{"CONT", SIGCONT},
#endif
-#ifdef SIGCHLD
- {"CHLD", SIGCHLD},
-#endif
-#ifdef SIGCLD
- {"CLD", SIGCLD},
-#else
-# ifdef SIGCHLD
- {"CLD", SIGCHLD},
-# endif
+#if RUBY_SIGCHLD
+ {"CHLD", RUBY_SIGCHLD },
+ {"CLD", RUBY_SIGCHLD },
#endif
#ifdef SIGTTIN
{"TTIN", SIGTTIN},
@@ -1052,18 +1054,7 @@ rb_trap_exit(void)
}
}
-static int
-sig_is_chld(int sig)
-{
-#if defined(SIGCLD)
- return (sig == SIGCLD);
-#elif defined(SIGCHLD)
- return (sig == SIGCHLD);
-#endif
- return 0;
-}
-
-void rb_sigchld(rb_vm_t *); /* process.c */
+void rb_waitpid_all(rb_vm_t *); /* process.c */
void
rb_signal_exec(rb_thread_t *th, int sig)
@@ -1072,9 +1063,10 @@ rb_signal_exec(rb_thread_t *th, int sig)
VALUE cmd = vm->trap_list.cmd[sig];
int safe = vm->trap_list.safe[sig];
- if (sig_is_chld(sig)) {
- rb_sigchld(vm);
+ if (sig == RUBY_SIGCHLD) {
+ rb_waitpid_all(vm);
}
+
if (cmd == 0) {
switch (sig) {
case SIGINT:
@@ -1134,10 +1126,8 @@ default_handler(int sig)
#ifdef SIGUSR2
case SIGUSR2:
#endif
-#ifdef SIGCLD
- case SIGCLD:
-#elif defined(SIGCHLD)
- case SIGCHLD:
+#if RUBY_SIGCHLD
+ case RUBY_SIGCHLD:
#endif
func = sighandler;
break;
@@ -1176,7 +1166,7 @@ trap_handler(VALUE *cmd, int sig)
VALUE command;
if (NIL_P(*cmd)) {
- if (sig_is_chld(sig)) {
+ if (sig == RUBY_SIGCHLD) {
goto sig_dfl;
}
func = SIG_IGN;
@@ -1199,9 +1189,9 @@ trap_handler(VALUE *cmd, int sig)
break;
case 14:
if (memcmp(cptr, "SYSTEM_DEFAULT", 14) == 0) {
- if (sig_is_chld(sig)) {
- goto sig_dfl;
- }
+ if (sig == RUBY_SIGCHLD) {
+ goto sig_dfl;
+ }
func = SIG_DFL;
*cmd = 0;
}
@@ -1209,9 +1199,9 @@ trap_handler(VALUE *cmd, int sig)
case 7:
if (memcmp(cptr, "SIG_IGN", 7) == 0) {
sig_ign:
- if (sig_is_chld(sig)) {
- goto sig_dfl;
- }
+ if (sig == RUBY_SIGCHLD) {
+ goto sig_dfl;
+ }
func = SIG_IGN;
*cmd = Qtrue;
}
@@ -1443,7 +1433,7 @@ install_sighandler(int signum, sighandler_t handler)
# define install_sighandler(signum, handler) \
INSTALL_SIGHANDLER(install_sighandler(signum, handler), #signum, signum)
-#if defined(SIGCLD) || defined(SIGCHLD)
+#if RUBY_SIGCHLD
static int
init_sigchld(int sig)
{
@@ -1570,10 +1560,8 @@ Init_signal(void)
install_sighandler(SIGSYS, sig_do_nothing);
#endif
-#if defined(SIGCLD)
- init_sigchld(SIGCLD);
-#elif defined(SIGCHLD)
- init_sigchld(SIGCHLD);
+#if RUBY_SIGCHLD
+ init_sigchld(RUBY_SIGCHLD);
#endif
rb_enable_interrupt();
diff --git a/thread.c b/thread.c
index 8c9aafe07a..ab27c60632 100644
--- a/thread.c
+++ b/thread.c
@@ -1287,6 +1287,17 @@ rb_thread_sleep_forever(void)
sleep_forever(GET_THREAD(), SLEEP_SPURIOUS_CHECK);
}
+void
+rb_thread_sleep_interruptible(struct timespec *ts)
+{
+ rb_thread_t *th = GET_THREAD();
+ enum rb_thread_status prev_status = th->status;
+
+ th->status = THREAD_STOPPED;
+ native_sleep(th, ts);
+ th->status = prev_status;
+}
+
void
rb_thread_sleep_deadly(void)
{
--
EW
next prev parent reply other threads:[~2018-06-25 23:50 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-06-25 23:50 [PATCHv3 0/8] mjit SIGCHLD hijack series Eric Wong
2018-06-25 23:50 ` [PATCH 1/8] hijack SIGCHLD handler for internal use Eric Wong
2018-06-25 23:50 ` Eric Wong [this message]
2018-06-25 23:50 ` [PATCH 3/8] mjit.c: allow working on platforms without SIGCHLD Eric Wong
2018-06-25 23:50 ` [PATCH 4/8] cleanups Eric Wong
2018-06-25 23:50 ` [PATCH 5/8] handle SIGCHLD in both the timer-thread and main thread Eric Wong
2018-06-25 23:50 ` [PATCH 6/8] Revert "test_process.rb: skip tests for Bug 14867" Eric Wong
2018-06-25 23:50 ` [PATCH 7/8] Revert "spec: skip Process wait specs on MJIT" Eric Wong
2018-06-25 23:50 ` [PATCH 8/8] wip testing Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180625235051.66045-3-e@80x24.org \
--to=e@80x24.org \
--cc=normalperson@yhbt.net \
--cc=spew@80x24.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).