From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.1 (2015-04-28) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS16276 66.70.128.0/17 X-Spam-Status: No, score=-1.1 required=3.0 tests=BAYES_00,RCVD_IN_SORBS_WEB, RCVD_IN_XBL,SPF_FAIL,SPF_HELO_FAIL,TO_EQ_FM_DOM_SPF_FAIL shortcircuit=no autolearn=no autolearn_force=no version=3.4.1 Received: from 80x24.org (tor.cusse.org [66.70.217.179]) by dcvr.yhbt.net (Postfix) with ESMTP id 39BF01F516 for ; Sat, 23 Jun 2018 15:58:28 +0000 (UTC) From: Eric Wong To: spew@80x24.org Subject: [PATCH] hijack SIGCHLD handler for internal use Date: Sat, 23 Jun 2018 15:58:26 +0000 Message-Id: <20180623155826.29681-1-e@80x24.org> List-Id: Use a global SIGCHLD handler to guard all callers of rb_waitpid. To work safely with multi-threaded programs, we introduce a VM-wide waitpid_lock to be acquired BEFORE fork/vfork spawns the process. This is to be combined with the new ruby_waitpid_locked function used by mjit.c in a non-Ruby thread. LIFO ordering of the waiting_pids list matches Linux wait4 syscall behavior and ensures ordering of conflicting rb_waitpid calls remains unchanged. The disabling of SIGCHLD in rb_f_system is longer necessary, as we use deferred signal handling and no longer make ANY blocking waitpid syscalls in other threads which could "beat" the waitpid call made by rb_f_system. Since this SIGCHLD handling relies on normal Ruby VM interrupt handling, we must continue to check VM interrupts during mjit_finish. Ruby-level SIGCHLD handlers registered with Signal.trap(:CHLD) continues to work as before and there should be no regressions in any existing use cases. --- mjit.c | 50 ++++++++++++--- process.c | 189 +++++++++++++++++++++++++++++++++++++++++++----------- signal.c | 40 ++++++++++-- thread.c | 2 + vm_core.h | 3 + 5 files changed, 232 insertions(+), 52 deletions(-) diff --git a/mjit.c b/mjit.c index 3dc34e8f45..55ff7e21ee 100644 --- a/mjit.c +++ b/mjit.c @@ -80,6 +80,7 @@ #include "constant.h" #include "id_table.h" #include "ruby_assert.h" +#include "ruby/thread.h" #include "ruby/util.h" #include "ruby/version.h" @@ -109,6 +110,9 @@ extern void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lo extern int rb_thread_create_mjit_thread(void (*child_hook)(void), void (*worker_func)(void)); + +pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options); + #define RB_CONDATTR_CLOCK_MONOTONIC 1 #ifdef _WIN32 @@ -380,21 +384,32 @@ exec_process(const char *path, char *const argv[]) { int stat, exit_code; pid_t pid; + rb_vm_t *vm = GET_VM(); + rb_nativethread_lock_lock(&vm->waitpid_lock); pid = start_process(path, argv); - if (pid <= 0) + if (pid <= 0) { + rb_nativethread_lock_unlock(&vm->waitpid_lock); return -2; - + } for (;;) { - waitpid(pid, &stat, 0); - if (WIFEXITED(stat)) { - exit_code = WEXITSTATUS(stat); - break; - } else if (WIFSIGNALED(stat)) { - exit_code = -1; + pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0); + if (r == -1) { + if (errno == EINTR) continue; + fprintf(stderr, "waitpid: %s\n", strerror(errno)); break; } + else if (r == pid) { + if (WIFEXITED(stat)) { + exit_code = WEXITSTATUS(stat); + break; + } else if (WIFSIGNALED(stat)) { + exit_code = -1; + break; + } + } } + rb_nativethread_lock_unlock(&vm->waitpid_lock); return exit_code; } @@ -1474,6 +1489,7 @@ stop_worker(void) CRITICAL_SECTION_START(3, "in stop_worker"); rb_native_cond_broadcast(&mjit_worker_wakeup); CRITICAL_SECTION_FINISH(3, "in stop_worker"); + RUBY_VM_CHECK_INTS(GET_EC()); } } @@ -1509,6 +1525,21 @@ mjit_resume(void) return Qtrue; } +static void * +wait_pch(void *ignored) +{ + rb_native_cond_wait(&mjit_pch_wakeup, &mjit_engine_mutex); + return 0; +} + +static void +ubf_pch(void *ignored) +{ + rb_native_mutex_lock(&mjit_engine_mutex); + rb_native_cond_signal(&mjit_pch_wakeup); + rb_native_mutex_unlock(&mjit_engine_mutex); +} + /* Finish the threads processing units and creating PCH, finalize and free MJIT data. It should be called last during MJIT life. */ @@ -1528,7 +1559,8 @@ mjit_finish(void) absence. So wait for a clean finish of the threads. */ while (pch_status == PCH_NOT_READY) { verbose(3, "Waiting wakeup from make_pch"); - rb_native_cond_wait(&mjit_pch_wakeup, &mjit_engine_mutex); + /* release GVL to handle interrupts */ + rb_thread_call_without_gvl(wait_pch, 0, ubf_pch, 0); } CRITICAL_SECTION_FINISH(3, "in mjit_finish to wakeup from pch"); diff --git a/process.c b/process.c index a60c0df16b..bdb92036b7 100644 --- a/process.c +++ b/process.c @@ -885,12 +885,6 @@ pst_wcoredump(VALUE st) #endif } -struct waitpid_arg { - rb_pid_t pid; - int flags; - int *st; -}; - static rb_pid_t do_waitpid(rb_pid_t pid, int *st, int flags) { @@ -903,25 +897,155 @@ do_waitpid(rb_pid_t pid, int *st, int flags) #endif } +struct waitpid_state { + struct list_node wnode; + rb_nativethread_cond_t cond; + rb_pid_t ret; + rb_pid_t pid; + int status; + int options; + int errnum; + rb_vm_t *vm; +}; + +void rb_native_cond_signal(rb_nativethread_cond_t *); +void rb_native_cond_wait(rb_nativethread_cond_t *, rb_nativethread_lock_t *); +void rb_native_cond_initialize(rb_nativethread_cond_t *); +void rb_native_cond_destroy(rb_nativethread_cond_t *); + +/* only called by vm->main_thread */ +void +rb_sigchld(rb_vm_t *vm) +{ + struct waitpid_state *w = 0, *next; + + rb_nativethread_lock_lock(&vm->waitpid_lock); + list_for_each_safe(&vm->waiting_pids, w, next, wnode) { + w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG); + if (w->ret == 0) continue; + if (w->ret == -1) w->errnum = errno; + list_del_init(&w->wnode); + rb_native_cond_signal(&w->cond); + } + rb_nativethread_lock_unlock(&vm->waitpid_lock); +} + +static void +waitpid_state_init(struct waitpid_state *w, rb_vm_t *vm, pid_t pid, int options) +{ + rb_native_cond_initialize(&w->cond); + w->ret = 0; + w->pid = pid; + w->status = 0; + w->options = options; + w->vm = vm; + list_node_init(&w->wnode); +} + +/* must be called with vm->waitpid_lock held, this is not interruptible */ +pid_t +ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options) +{ + struct waitpid_state w; + + assert(!ruby_thread_has_gvl_p() && "must not have GVL"); + + waitpid_state_init(&w, vm, pid, options); + w.ret = do_waitpid(w.pid, &w.status, w.options | WNOHANG); + if (w.ret) { + if (w.ret == -1) { + w.errnum = errno; + } + } + else { + list_add(&vm->waiting_pids, &w.wnode); + while (!w.ret) { + rb_native_cond_wait(&w.cond, &vm->waitpid_lock); + } + list_del(&w.wnode); + } + if (status) { + *status = w.status; + } + rb_native_cond_destroy(&w.cond); + errno = w.errnum; + return w.ret; +} + +static void +waitpid_ubf(void *x) +{ + struct waitpid_state *w = x; + rb_nativethread_lock_lock(&w->vm->waitpid_lock); + if (!w->ret) { + w->errnum = EINTR; + w->ret = -1; + } + rb_native_cond_signal(&w->cond); + rb_nativethread_lock_unlock(&w->vm->waitpid_lock); +} + static void * -rb_waitpid_blocking(void *data) +waitpid_nogvl(void *x) { - struct waitpid_arg *arg = data; - rb_pid_t result = do_waitpid(arg->pid, arg->st, arg->flags); - return (void *)(VALUE)result; + struct waitpid_state *w = x; + + /* let rb_sigchld handle it */ + rb_native_cond_wait(&w->cond, &w->vm->waitpid_lock); + + return 0; } -static rb_pid_t -do_waitpid_nonblocking(rb_pid_t pid, int *st, int flags) +static VALUE +waitpid_wait(VALUE x) +{ + struct waitpid_state *w = (struct waitpid_state *)x; + + rb_nativethread_lock_lock(&w->vm->waitpid_lock); + w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG); + if (w->ret) { + if (w->ret == -1) { + w->errnum = errno; + } + } + else { + rb_execution_context_t *ec = GET_EC(); + + list_add(&w->vm->waiting_pids, &w->wnode); + do { + rb_thread_call_without_gvl2(waitpid_nogvl, w, waitpid_ubf, w); + if (RUBY_VM_INTERRUPTED_ANY(ec) || + (w->ret == -1 && w->errnum == EINTR)) { + rb_nativethread_lock_unlock(&w->vm->waitpid_lock); + + RUBY_VM_CHECK_INTS(ec); + + rb_nativethread_lock_lock(&w->vm->waitpid_lock); + if (w->ret == -1 && w->errnum == EINTR) { + w->ret = do_waitpid(w->pid, &w->status, w->options|WNOHANG); + if (w->ret == -1) + w->errnum = errno; + } + } + } while (!w->ret); + } + rb_nativethread_lock_unlock(&w->vm->waitpid_lock); + return Qfalse; +} + +static VALUE +waitpid_ensure(VALUE x) { - void *result; - struct waitpid_arg arg; - arg.pid = pid; - arg.st = st; - arg.flags = flags; - result = rb_thread_call_without_gvl(rb_waitpid_blocking, &arg, - RUBY_UBF_PROCESS, 0); - return (rb_pid_t)(VALUE)result; + struct waitpid_state *w = (struct waitpid_state *)x; + + if (w->ret <= 0) { + rb_nativethread_lock_lock(&w->vm->waitpid_lock); + list_del_init(&w->wnode); + rb_nativethread_lock_unlock(&w->vm->waitpid_lock); + } + + rb_native_cond_destroy(&w->cond); + return Qfalse; } rb_pid_t @@ -933,10 +1057,14 @@ rb_waitpid(rb_pid_t pid, int *st, int flags) result = do_waitpid(pid, st, flags); } else { - while ((result = do_waitpid_nonblocking(pid, st, flags)) < 0 && - (errno == EINTR)) { - RUBY_VM_CHECK_INTS(GET_EC()); - } + struct waitpid_state w; + + waitpid_state_init(&w, GET_VM(), pid, flags); + rb_ensure(waitpid_wait, (VALUE)&w, waitpid_ensure, (VALUE)&w); + if (st) { + *st = w.status; + } + result = w.ret; } if (result > 0) { rb_last_status_set(*st, result); @@ -4081,16 +4209,6 @@ rb_f_system(int argc, VALUE *argv) VALUE execarg_obj; struct rb_execarg *eargp; -#if defined(SIGCLD) && !defined(SIGCHLD) -# define SIGCHLD SIGCLD -#endif - -#ifdef SIGCHLD - RETSIGTYPE (*chfunc)(int); - - rb_last_status_clear(); - chfunc = signal(SIGCHLD, SIG_DFL); -#endif execarg_obj = rb_execarg_new(argc, argv, TRUE, TRUE); pid = rb_execarg_spawn(execarg_obj, NULL, 0); #if defined(HAVE_WORKING_FORK) || defined(HAVE_SPAWNV) @@ -4100,9 +4218,6 @@ rb_f_system(int argc, VALUE *argv) if (ret == (rb_pid_t)-1) rb_sys_fail("Another thread waited the process started by system()."); } -#endif -#ifdef SIGCHLD - signal(SIGCHLD, chfunc); #endif TypedData_Get_Struct(execarg_obj, struct rb_execarg, &exec_arg_data_type, eargp); if (pid < 0) { diff --git a/signal.c b/signal.c index c781c38c62..c20b01ea36 100644 --- a/signal.c +++ b/signal.c @@ -1052,6 +1052,19 @@ rb_trap_exit(void) } } +static int +sig_is_chld(int sig) +{ +#if defined(SIGCLD) + return (sig == SIGCLD); +#elif defined(SIGCHLD) + return (sig == SIGCHLD); +#endif + return 0; +} + +void rb_sigchld(rb_vm_t *); /* process.c */ + void rb_signal_exec(rb_thread_t *th, int sig) { @@ -1059,6 +1072,9 @@ rb_signal_exec(rb_thread_t *th, int sig) VALUE cmd = vm->trap_list.cmd[sig]; int safe = vm->trap_list.safe[sig]; + if (sig_is_chld(sig)) { + rb_sigchld(vm); + } if (cmd == 0) { switch (sig) { case SIGINT: @@ -1117,6 +1133,11 @@ default_handler(int sig) #endif #ifdef SIGUSR2 case SIGUSR2: +#endif +#ifdef SIGCLD + case SIGCLD: +#elif defined(SIGCHLD) + case SIGCHLD: #endif func = sighandler; break; @@ -1155,6 +1176,9 @@ trap_handler(VALUE *cmd, int sig) VALUE command; if (NIL_P(*cmd)) { + if (sig_is_chld(sig)) { + goto sig_dfl; + } func = SIG_IGN; } else { @@ -1175,6 +1199,9 @@ trap_handler(VALUE *cmd, int sig) break; case 14: if (memcmp(cptr, "SYSTEM_DEFAULT", 14) == 0) { + if (sig_is_chld(sig)) { + goto sig_dfl; + } func = SIG_DFL; *cmd = 0; } @@ -1182,6 +1209,9 @@ trap_handler(VALUE *cmd, int sig) case 7: if (memcmp(cptr, "SIG_IGN", 7) == 0) { sig_ign: + if (sig_is_chld(sig)) { + goto sig_dfl; + } func = SIG_IGN; *cmd = Qtrue; } @@ -1418,15 +1448,13 @@ static int init_sigchld(int sig) { sighandler_t oldfunc; + sighandler_t func = sighandler; oldfunc = ruby_signal(sig, SIG_DFL); if (oldfunc == SIG_ERR) return -1; - if (oldfunc != SIG_DFL && oldfunc != SIG_IGN) { - ruby_signal(sig, oldfunc); - } - else { - GET_VM()->trap_list.cmd[sig] = 0; - } + ruby_signal(sig, func); + GET_VM()->trap_list.cmd[sig] = 0; + return 0; } diff --git a/thread.c b/thread.c index 5a5e32379d..8c9aafe07a 100644 --- a/thread.c +++ b/thread.c @@ -413,6 +413,7 @@ rb_vm_gvl_destroy(rb_vm_t *vm) gvl_release(vm); gvl_destroy(vm); rb_native_mutex_destroy(&vm->thread_destruct_lock); + rb_native_mutex_destroy(&vm->waitpid_lock); } void @@ -4999,6 +5000,7 @@ Init_Thread(void) gvl_init(th->vm); gvl_acquire(th->vm, th); rb_native_mutex_initialize(&th->vm->thread_destruct_lock); + rb_native_mutex_initialize(&th->vm->waitpid_lock); rb_native_mutex_initialize(&th->interrupt_lock); th->pending_interrupt_queue = rb_ary_tmp_new(0); diff --git a/vm_core.h b/vm_core.h index ee151195d5..d0689e5ba6 100644 --- a/vm_core.h +++ b/vm_core.h @@ -553,6 +553,8 @@ typedef struct rb_vm_struct { #endif rb_serial_t fork_gen; + rb_nativethread_lock_t waitpid_lock; + struct list_head waiting_pids; /* <=> struct waitpid_state */ struct list_head waiting_fds; /* <=> struct waiting_fd */ struct list_head living_threads; VALUE thgroup_default; @@ -1561,6 +1563,7 @@ static inline void rb_vm_living_threads_init(rb_vm_t *vm) { list_head_init(&vm->waiting_fds); + list_head_init(&vm->waiting_pids); list_head_init(&vm->living_threads); vm->living_thread_num = 0; } -- EW