[PATCH] hijack SIGCHLD handler for internal use

dumping ground for random patches and texts
 help / color / mirror / Atom feed

From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] hijack SIGCHLD handler for internal use
Date: Sat, 23 Jun 2018 15:58:26 +0000	[thread overview]
Message-ID: <20180623155826.29681-1-e@80x24.org> (raw)

Use a global SIGCHLD handler to guard all callers of rb_waitpid.
To work safely with multi-threaded programs, we introduce a
VM-wide waitpid_lock to be acquired BEFORE fork/vfork spawns the
process.  This is to be combined with the new ruby_waitpid_locked
function used by mjit.c in a non-Ruby thread.

LIFO ordering of the waiting_pids list matches Linux wait4
syscall behavior and ensures ordering of conflicting
rb_waitpid calls remains unchanged.

The disabling of SIGCHLD in rb_f_system is longer necessary,
as we use deferred signal handling and no longer make ANY
blocking waitpid syscalls in other threads which could "beat"
the waitpid call made by rb_f_system.

Since this SIGCHLD handling relies on normal Ruby VM interrupt
handling, we must continue to check VM interrupts during
mjit_finish.

Ruby-level SIGCHLD handlers registered with Signal.trap(:CHLD)
continues to work as before and there should be no regressions
in any existing use cases.
---
 mjit.c    |  50 ++++++++++++---
 process.c | 189 +++++++++++++++++++++++++++++++++++++++++++-----------
 signal.c  |  40 ++++++++++--
 thread.c  |   2 +
 vm_core.h |   3 +
 5 files changed, 232 insertions(+), 52 deletions(-)

diff --git a/mjit.c b/mjit.c
index 3dc34e8f45..55ff7e21ee 100644
--- a/mjit.c
+++ b/mjit.c
@@ -80,6 +80,7 @@
 #include "constant.h"
 #include "id_table.h"
 #include "ruby_assert.h"
+#include "ruby/thread.h"
 #include "ruby/util.h"
 #include "ruby/version.h"
 
@@ -109,6 +110,9 @@ extern void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lo
 
 extern int rb_thread_create_mjit_thread(void (*child_hook)(void), void (*worker_func)(void));
 
+
+pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options);
+
 #define RB_CONDATTR_CLOCK_MONOTONIC 1
 
 #ifdef _WIN32
@@ -380,21 +384,32 @@ exec_process(const char *path, char *const argv[])
 {
     int stat, exit_code;
     pid_t pid;
+    rb_vm_t *vm = GET_VM();
 
+    rb_nativethread_lock_lock(&vm->waitpid_lock);
     pid = start_process(path, argv);
-    if (pid <= 0)
+    if (pid <= 0) {
+        rb_nativethread_lock_unlock(&vm->waitpid_lock);
         return -2;
-
+    }
     for (;;) {
-        waitpid(pid, &stat, 0);
-        if (WIFEXITED(stat)) {
-            exit_code = WEXITSTATUS(stat);
-            break;
-        } else if (WIFSIGNALED(stat)) {
-            exit_code = -1;
+        pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0);
+        if (r == -1) {
+            if (errno == EINTR) continue;
+            fprintf(stderr, "waitpid: %s\n", strerror(errno));
             break;
         }
+        else if (r == pid) {
+            if (WIFEXITED(stat)) {
+                exit_code = WEXITSTATUS(stat);
+                break;
+            } else if (WIFSIGNALED(stat)) {
+                exit_code = -1;
+                break;
+            }
+        }
     }
+    rb_nativethread_lock_unlock(&vm->waitpid_lock);
     return exit_code;
 }
 
@@ -1474,6 +1489,7 @@ stop_worker(void)
         CRITICAL_SECTION_START(3, "in stop_worker");
         rb_native_cond_broadcast(&mjit_worker_wakeup);
         CRITICAL_SECTION_FINISH(3, "in stop_worker");
+        RUBY_VM_CHECK_INTS(GET_EC());
     }
 }
 
@@ -1509,6 +1525,21 @@ mjit_resume(void)
     return Qtrue;
 }
 
+static void *
+wait_pch(void *ignored)
+{
+    rb_native_cond_wait(&mjit_pch_wakeup, &mjit_engine_mutex);
+    return 0;
+}
+
+static void
+ubf_pch(void *ignored)
+{
+    rb_native_mutex_lock(&mjit_engine_mutex);
+    rb_native_cond_signal(&mjit_pch_wakeup);
+    rb_native_mutex_unlock(&mjit_engine_mutex);
+}
+
 /* Finish the threads processing units and creating PCH, finalize
    and free MJIT data.  It should be called last during MJIT
    life.  */
@@ -1528,7 +1559,8 @@ mjit_finish(void)
        absence.  So wait for a clean finish of the threads.  */
     while (pch_status == PCH_NOT_READY) {
         verbose(3, "Waiting wakeup from make_pch");
-        rb_native_cond_wait(&mjit_pch_wakeup, &mjit_engine_mutex);
+	/* release GVL to handle interrupts */
+        rb_thread_call_without_gvl(wait_pch, 0, ubf_pch, 0);
     }
     CRITICAL_SECTION_FINISH(3, "in mjit_finish to wakeup from pch");
 
diff --git a/process.c b/process.c
index a60c0df16b..bdb92036b7 100644
--- a/process.c
+++ b/process.c
@@ -885,12 +885,6 @@ pst_wcoredump(VALUE st)
 #endif
 }
 
-struct waitpid_arg {
-    rb_pid_t pid;
-    int flags;
-    int *st;
-};
-
 static rb_pid_t
 do_waitpid(rb_pid_t pid, int *st, int flags)
 {
@@ -903,25 +897,155 @@ do_waitpid(rb_pid_t pid, int *st, int flags)
 #endif
 }
 
+struct waitpid_state {
+    struct list_node wnode;
+    rb_nativethread_cond_t cond;
+    rb_pid_t ret;
+    rb_pid_t pid;
+    int status;
+    int options;
+    int errnum;
+    rb_vm_t *vm;
+};
+
+void rb_native_cond_signal(rb_nativethread_cond_t *);
+void rb_native_cond_wait(rb_nativethread_cond_t *, rb_nativethread_lock_t *);
+void rb_native_cond_initialize(rb_nativethread_cond_t *);
+void rb_native_cond_destroy(rb_nativethread_cond_t *);
+
+/* only called by vm->main_thread */
+void
+rb_sigchld(rb_vm_t *vm)
+{
+    struct waitpid_state *w = 0, *next;
+
+    rb_nativethread_lock_lock(&vm->waitpid_lock);
+    list_for_each_safe(&vm->waiting_pids, w, next, wnode) {
+        w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+        if (w->ret == 0) continue;
+        if (w->ret == -1) w->errnum = errno;
+        list_del_init(&w->wnode);
+        rb_native_cond_signal(&w->cond);
+    }
+    rb_nativethread_lock_unlock(&vm->waitpid_lock);
+}
+
+static void
+waitpid_state_init(struct waitpid_state *w, rb_vm_t *vm, pid_t pid, int options)
+{
+    rb_native_cond_initialize(&w->cond);
+    w->ret = 0;
+    w->pid = pid;
+    w->status = 0;
+    w->options = options;
+    w->vm = vm;
+    list_node_init(&w->wnode);
+}
+
+/* must be called with vm->waitpid_lock held, this is not interruptible */
+pid_t
+ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options)
+{
+    struct waitpid_state w;
+
+    assert(!ruby_thread_has_gvl_p() && "must not have GVL");
+
+    waitpid_state_init(&w, vm, pid, options);
+    w.ret = do_waitpid(w.pid, &w.status, w.options | WNOHANG);
+    if (w.ret) {
+        if (w.ret == -1) {
+            w.errnum = errno;
+        }
+    }
+    else {
+        list_add(&vm->waiting_pids, &w.wnode);
+        while (!w.ret) {
+            rb_native_cond_wait(&w.cond, &vm->waitpid_lock);
+        }
+        list_del(&w.wnode);
+    }
+    if (status) {
+        *status = w.status;
+    }
+    rb_native_cond_destroy(&w.cond);
+    errno = w.errnum;
+    return w.ret;
+}
+
+static void
+waitpid_ubf(void *x)
+{
+    struct waitpid_state *w = x;
+    rb_nativethread_lock_lock(&w->vm->waitpid_lock);
+    if (!w->ret) {
+        w->errnum = EINTR;
+        w->ret = -1;
+    }
+    rb_native_cond_signal(&w->cond);
+    rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
+}
+
 static void *
-rb_waitpid_blocking(void *data)
+waitpid_nogvl(void *x)
 {
-    struct waitpid_arg *arg = data;
-    rb_pid_t result = do_waitpid(arg->pid, arg->st, arg->flags);
-    return (void *)(VALUE)result;
+    struct waitpid_state *w = x;
+
+    /* let rb_sigchld handle it */
+    rb_native_cond_wait(&w->cond, &w->vm->waitpid_lock);
+
+    return 0;
 }
 
-static rb_pid_t
-do_waitpid_nonblocking(rb_pid_t pid, int *st, int flags)
+static VALUE
+waitpid_wait(VALUE x)
+{
+    struct waitpid_state *w = (struct waitpid_state *)x;
+
+    rb_nativethread_lock_lock(&w->vm->waitpid_lock);
+    w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+    if (w->ret) {
+        if (w->ret == -1) {
+            w->errnum = errno;
+        }
+    }
+    else {
+        rb_execution_context_t *ec = GET_EC();
+
+        list_add(&w->vm->waiting_pids, &w->wnode);
+        do {
+            rb_thread_call_without_gvl2(waitpid_nogvl, w, waitpid_ubf, w);
+            if (RUBY_VM_INTERRUPTED_ANY(ec) ||
+                    (w->ret == -1 && w->errnum == EINTR)) {
+                rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
+
+                RUBY_VM_CHECK_INTS(ec);
+
+                rb_nativethread_lock_lock(&w->vm->waitpid_lock);
+                if (w->ret == -1 && w->errnum == EINTR) {
+                    w->ret = do_waitpid(w->pid, &w->status, w->options|WNOHANG);
+                    if (w->ret == -1)
+                        w->errnum = errno;
+                }
+            }
+        } while (!w->ret);
+    }
+    rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
+    return Qfalse;
+}
+
+static VALUE
+waitpid_ensure(VALUE x)
 {
-    void *result;
-    struct waitpid_arg arg;
-    arg.pid = pid;
-    arg.st = st;
-    arg.flags = flags;
-    result = rb_thread_call_without_gvl(rb_waitpid_blocking, &arg,
-					RUBY_UBF_PROCESS, 0);
-    return (rb_pid_t)(VALUE)result;
+    struct waitpid_state *w = (struct waitpid_state *)x;
+
+    if (w->ret <= 0) {
+        rb_nativethread_lock_lock(&w->vm->waitpid_lock);
+        list_del_init(&w->wnode);
+        rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
+    }
+
+    rb_native_cond_destroy(&w->cond);
+    return Qfalse;
 }
 
 rb_pid_t
@@ -933,10 +1057,14 @@ rb_waitpid(rb_pid_t pid, int *st, int flags)
 	result = do_waitpid(pid, st, flags);
     }
     else {
-	while ((result = do_waitpid_nonblocking(pid, st, flags)) < 0 &&
-	       (errno == EINTR)) {
-	    RUBY_VM_CHECK_INTS(GET_EC());
-	}
+        struct waitpid_state w;
+
+        waitpid_state_init(&w, GET_VM(), pid, flags);
+        rb_ensure(waitpid_wait, (VALUE)&w, waitpid_ensure, (VALUE)&w);
+        if (st) {
+            *st = w.status;
+        }
+        result = w.ret;
     }
     if (result > 0) {
 	rb_last_status_set(*st, result);
@@ -4081,16 +4209,6 @@ rb_f_system(int argc, VALUE *argv)
     VALUE execarg_obj;
     struct rb_execarg *eargp;
 
-#if defined(SIGCLD) && !defined(SIGCHLD)
-# define SIGCHLD SIGCLD
-#endif
-
-#ifdef SIGCHLD
-    RETSIGTYPE (*chfunc)(int);
-
-    rb_last_status_clear();
-    chfunc = signal(SIGCHLD, SIG_DFL);
-#endif
     execarg_obj = rb_execarg_new(argc, argv, TRUE, TRUE);
     pid = rb_execarg_spawn(execarg_obj, NULL, 0);
 #if defined(HAVE_WORKING_FORK) || defined(HAVE_SPAWNV)
@@ -4100,9 +4218,6 @@ rb_f_system(int argc, VALUE *argv)
         if (ret == (rb_pid_t)-1)
             rb_sys_fail("Another thread waited the process started by system().");
     }
-#endif
-#ifdef SIGCHLD
-    signal(SIGCHLD, chfunc);
 #endif
     TypedData_Get_Struct(execarg_obj, struct rb_execarg, &exec_arg_data_type, eargp);
     if (pid < 0) {
diff --git a/signal.c b/signal.c
index c781c38c62..c20b01ea36 100644
--- a/signal.c
+++ b/signal.c
@@ -1052,6 +1052,19 @@ rb_trap_exit(void)
     }
 }
 
+static int
+sig_is_chld(int sig)
+{
+#if defined(SIGCLD)
+    return (sig == SIGCLD);
+#elif defined(SIGCHLD)
+    return (sig == SIGCHLD);
+#endif
+    return 0;
+}
+
+void rb_sigchld(rb_vm_t *); /* process.c */
+
 void
 rb_signal_exec(rb_thread_t *th, int sig)
 {
@@ -1059,6 +1072,9 @@ rb_signal_exec(rb_thread_t *th, int sig)
     VALUE cmd = vm->trap_list.cmd[sig];
     int safe = vm->trap_list.safe[sig];
 
+    if (sig_is_chld(sig)) {
+	rb_sigchld(vm);
+    }
     if (cmd == 0) {
 	switch (sig) {
 	  case SIGINT:
@@ -1117,6 +1133,11 @@ default_handler(int sig)
 #endif
 #ifdef SIGUSR2
       case SIGUSR2:
+#endif
+#ifdef SIGCLD
+      case SIGCLD:
+#elif defined(SIGCHLD)
+      case SIGCHLD:
 #endif
         func = sighandler;
         break;
@@ -1155,6 +1176,9 @@ trap_handler(VALUE *cmd, int sig)
     VALUE command;
 
     if (NIL_P(*cmd)) {
+	if (sig_is_chld(sig)) {
+	    goto sig_dfl;
+	}
 	func = SIG_IGN;
     }
     else {
@@ -1175,6 +1199,9 @@ trap_handler(VALUE *cmd, int sig)
 		break;
               case 14:
 		if (memcmp(cptr, "SYSTEM_DEFAULT", 14) == 0) {
+		    if (sig_is_chld(sig)) {
+			goto sig_dfl;
+		    }
                     func = SIG_DFL;
                     *cmd = 0;
 		}
@@ -1182,6 +1209,9 @@ trap_handler(VALUE *cmd, int sig)
 	      case 7:
 		if (memcmp(cptr, "SIG_IGN", 7) == 0) {
 sig_ign:
+		    if (sig_is_chld(sig)) {
+			goto sig_dfl;
+		    }
                     func = SIG_IGN;
                     *cmd = Qtrue;
 		}
@@ -1418,15 +1448,13 @@ static int
 init_sigchld(int sig)
 {
     sighandler_t oldfunc;
+    sighandler_t func = sighandler;
 
     oldfunc = ruby_signal(sig, SIG_DFL);
     if (oldfunc == SIG_ERR) return -1;
-    if (oldfunc != SIG_DFL && oldfunc != SIG_IGN) {
-	ruby_signal(sig, oldfunc);
-    }
-    else {
-	GET_VM()->trap_list.cmd[sig] = 0;
-    }
+    ruby_signal(sig, func);
+    GET_VM()->trap_list.cmd[sig] = 0;
+
     return 0;
 }
 
diff --git a/thread.c b/thread.c
index 5a5e32379d..8c9aafe07a 100644
--- a/thread.c
+++ b/thread.c
@@ -413,6 +413,7 @@ rb_vm_gvl_destroy(rb_vm_t *vm)
     gvl_release(vm);
     gvl_destroy(vm);
     rb_native_mutex_destroy(&vm->thread_destruct_lock);
+    rb_native_mutex_destroy(&vm->waitpid_lock);
 }
 
 void
@@ -4999,6 +5000,7 @@ Init_Thread(void)
 	    gvl_init(th->vm);
 	    gvl_acquire(th->vm, th);
             rb_native_mutex_initialize(&th->vm->thread_destruct_lock);
+            rb_native_mutex_initialize(&th->vm->waitpid_lock);
             rb_native_mutex_initialize(&th->interrupt_lock);
 
 	    th->pending_interrupt_queue = rb_ary_tmp_new(0);
diff --git a/vm_core.h b/vm_core.h
index ee151195d5..d0689e5ba6 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -553,6 +553,8 @@ typedef struct rb_vm_struct {
 #endif
 
     rb_serial_t fork_gen;
+    rb_nativethread_lock_t waitpid_lock;
+    struct list_head waiting_pids; /* <=> struct waitpid_state */
     struct list_head waiting_fds; /* <=> struct waiting_fd */
     struct list_head living_threads;
     VALUE thgroup_default;
@@ -1561,6 +1563,7 @@ static inline void
 rb_vm_living_threads_init(rb_vm_t *vm)
 {
     list_head_init(&vm->waiting_fds);
+    list_head_init(&vm->waiting_pids);
     list_head_init(&vm->living_threads);
     vm->living_thread_num = 0;
 }
-- 
EW

                 reply	other threads:[~2018-06-23 15:58 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

find likely ancestor, descendant, or conflicting patches for this message:
dfblob:3dc34e8f4 dfblob:a60c0df16 dfblob:c781c38c6 dfblob:5a5e32379
dfblob:ee151195d dfblob:55ff7e21e dfblob:bdb92036b dfblob:c20b01ea3
dfblob:8c9aafe07 dfblob:d0689e5ba
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180623155826.29681-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=spew@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).