dumping ground for random patches and texts
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Cc: Eric Wong <normalperson@yhbt.net>
Subject: [PATCH 2/8] fix SIGCHLD hijacking race conditions
Date: Mon, 25 Jun 2018 23:50:45 +0000	[thread overview]
Message-ID: <20180625235051.66045-3-e@80x24.org> (raw)
In-Reply-To: <20180625235051.66045-1-e@80x24.org>

From: Eric Wong <normalperson@yhbt.net>

It's a bit tricky, and I introduced yet another sleep function
(rb_thread_sleep_interruptible) to make it work.

Original problem in my patch was holding a native mutex while
forking is dangerous, so I went to normal ruby sleep methods
(same as Mutex and autoload).

However, it's impossible to resume main thread from signal
handler because rb_threadptr_execute_interrupts always restores
th->status after trap handlers.  In other words, this doesn't work:

        main = Thread.current
        trap(:USR1) { main.run }
        Thread.new { sleep 0.1; Process.kill(:USR1, $$) }
        sleep

So I tried moving rb_sigchld/rb_waitpid_all into timer-thread;
but that's racy and not doable unless the Ruby thread holds
a native mutex while sleeping on a native condvar (w/o GVL).
And that brings us back to the original problem...
(Well, I could use futex :P)

Now, back to using the main thread to handle SIGCHLD...

It seems to be working...
---
 mjit.c    |  13 ++++--
 process.c | 156 +++++++++++++++++++++++++++++---------------------------------
 signal.c  |  66 +++++++++++---------------
 thread.c  |  11 +++++
 4 files changed, 120 insertions(+), 126 deletions(-)

diff --git a/mjit.c b/mjit.c
index 4aa1524a55..63cd35da21 100644
--- a/mjit.c
+++ b/mjit.c
@@ -120,7 +120,8 @@ extern void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lo
 extern int rb_thread_create_mjit_thread(void (*child_hook)(void), void (*worker_func)(void));
 
 
-pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options);
+pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options,
+                          rb_nativethread_cond_t *cond);
 
 #define RB_CONDATTR_CLOCK_MONOTONIC 1
 
@@ -408,6 +409,7 @@ exec_process(const char *path, char *const argv[])
     int stat, exit_code;
     pid_t pid;
     rb_vm_t *vm = GET_VM();
+    rb_nativethread_cond_t cond;
 
     rb_nativethread_lock_lock(&vm->waitpid_lock);
     pid = start_process(path, argv);
@@ -415,11 +417,13 @@ exec_process(const char *path, char *const argv[])
         rb_nativethread_lock_unlock(&vm->waitpid_lock);
         return -2;
     }
+    rb_native_cond_initialize(&cond);
     for (;;) {
-        pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0);
+        pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0, &cond);
         if (r == -1) {
-            if (errno == EINTR) continue;
+            if (errno == EINTR) continue; /* should never happen */
             fprintf(stderr, "waitpid: %s\n", strerror(errno));
+            exit_code = -2;
             break;
         }
         else if (r == pid) {
@@ -433,6 +437,7 @@ exec_process(const char *path, char *const argv[])
         }
     }
     rb_nativethread_lock_unlock(&vm->waitpid_lock);
+    rb_native_cond_destroy(&cond);
     return exit_code;
 }
 
@@ -1582,7 +1587,7 @@ mjit_finish(void)
        absence.  So wait for a clean finish of the threads.  */
     while (pch_status == PCH_NOT_READY) {
         verbose(3, "Waiting wakeup from make_pch");
-	/* release GVL to handle interrupts */
+        /* release GVL to handle interrupts */
         rb_thread_call_without_gvl(wait_pch, 0, ubf_pch, 0);
     }
     CRITICAL_SECTION_FINISH(3, "in mjit_finish to wakeup from pch");
diff --git a/process.c b/process.c
index c92351b83d..9f0995e194 100644
--- a/process.c
+++ b/process.c
@@ -899,153 +899,143 @@ do_waitpid(rb_pid_t pid, int *st, int flags)
 
 struct waitpid_state {
     struct list_node wnode;
-    rb_nativethread_cond_t cond;
+    union {
+        rb_nativethread_cond_t *cond; /* non-Ruby thread */
+        rb_execution_context_t *ec; /* normal Ruby execution context */
+    } wake;
     rb_pid_t ret;
     rb_pid_t pid;
     int status;
     int options;
     int errnum;
-    rb_vm_t *vm;
+    unsigned int is_ruby : 1;
 };
 
+void rb_native_mutex_lock(rb_nativethread_lock_t *);
+void rb_native_mutex_unlock(rb_nativethread_lock_t *);
 void rb_native_cond_signal(rb_nativethread_cond_t *);
 void rb_native_cond_wait(rb_nativethread_cond_t *, rb_nativethread_lock_t *);
-void rb_native_cond_initialize(rb_nativethread_cond_t *);
-void rb_native_cond_destroy(rb_nativethread_cond_t *);
 
-/* only called by vm->main_thread */
+/* called by vm->main_thread */
 void
-rb_sigchld(rb_vm_t *vm)
+rb_waitpid_all(rb_vm_t *vm)
 {
     struct waitpid_state *w = 0, *next;
 
-    rb_nativethread_lock_lock(&vm->waitpid_lock);
+    rb_native_mutex_lock(&vm->waitpid_lock);
     list_for_each_safe(&vm->waiting_pids, w, next, wnode) {
         w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+
         if (w->ret == 0) continue;
         if (w->ret == -1) w->errnum = errno;
+
         list_del_init(&w->wnode);
-        rb_native_cond_signal(&w->cond);
+        if (w->is_ruby) {
+            rb_thread_wakeup_alive(rb_ec_thread_ptr(w->wake.ec)->self);
+        }
+        else {
+            rb_native_cond_signal(w->wake.cond);
+        }
     }
-    rb_nativethread_lock_unlock(&vm->waitpid_lock);
+    rb_native_mutex_unlock(&vm->waitpid_lock);
 }
 
 static void
-waitpid_state_init(struct waitpid_state *w, rb_vm_t *vm, pid_t pid, int options)
+waitpid_state_init(struct waitpid_state *w, pid_t pid, int options)
 {
-    rb_native_cond_initialize(&w->cond);
     w->ret = 0;
     w->pid = pid;
-    w->status = 0;
     w->options = options;
-    w->vm = vm;
     list_node_init(&w->wnode);
 }
 
-/* must be called with vm->waitpid_lock held, this is not interruptible */
+/*
+ * must be called with vm->waitpid_lock held, this is not interruptible
+ */
 pid_t
-ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options)
+ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options,
+                    rb_nativethread_cond_t *cond)
 {
     struct waitpid_state w;
 
     assert(!ruby_thread_has_gvl_p() && "must not have GVL");
 
-    waitpid_state_init(&w, vm, pid, options);
+    waitpid_state_init(&w, pid, options);
+    w.is_ruby = 0;
     w.ret = do_waitpid(w.pid, &w.status, w.options | WNOHANG);
     if (w.ret) {
-        if (w.ret == -1) {
-            w.errnum = errno;
-        }
+        if (w.ret == -1) w.errnum = errno;
     }
     else {
+        w.wake.cond = cond;
         list_add(&vm->waiting_pids, &w.wnode);
-        while (!w.ret) {
-            rb_native_cond_wait(&w.cond, &vm->waitpid_lock);
-        }
+        do {
+            rb_native_cond_wait(w.wake.cond, &vm->waitpid_lock);
+        } while (!w.ret);
         list_del(&w.wnode);
     }
     if (status) {
         *status = w.status;
     }
-    rb_native_cond_destroy(&w.cond);
     errno = w.errnum;
     return w.ret;
 }
 
-static void
-waitpid_ubf(void *x)
-{
-    struct waitpid_state *w = x;
-    rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-    if (!w->ret) {
-        w->errnum = EINTR;
-        w->ret = -1;
-    }
-    rb_native_cond_signal(&w->cond);
-    rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-}
+void rb_thread_sleep_interruptible(struct timespec *ts); /* thread.c */
 
-static void *
-waitpid_nogvl(void *x)
+static VALUE
+waitpid_sleep(VALUE x)
 {
-    struct waitpid_state *w = x;
+    struct waitpid_state *w = (struct waitpid_state *)x;
 
-    /* let rb_sigchld handle it */
-    rb_native_cond_wait(&w->cond, &w->vm->waitpid_lock);
+    rb_thread_check_ints();
+    while (!w->ret) {
+        rb_thread_sleep_interruptible(0);
+        rb_thread_check_ints();
+    }
 
-    return 0;
+    return Qfalse;
 }
 
 static VALUE
-waitpid_wait(VALUE x)
+waitpid_ensure(VALUE x)
 {
     struct waitpid_state *w = (struct waitpid_state *)x;
 
-    rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-    w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
-    if (w->ret) {
-        if (w->ret == -1) {
-            w->errnum = errno;
-        }
-    }
-    else {
-        rb_execution_context_t *ec = GET_EC();
+    if (w->ret == 0) {
+        rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
 
-        list_add(&w->vm->waiting_pids, &w->wnode);
-        do {
-            rb_thread_call_without_gvl2(waitpid_nogvl, w, waitpid_ubf, w);
-            if (RUBY_VM_INTERRUPTED_ANY(ec) ||
-                    (w->ret == -1 && w->errnum == EINTR)) {
-                rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-
-                RUBY_VM_CHECK_INTS(ec);
-
-                rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-                if (w->ret == -1 && w->errnum == EINTR) {
-                    w->ret = do_waitpid(w->pid, &w->status, w->options|WNOHANG);
-                    if (w->ret == -1)
-                        w->errnum = errno;
-                }
-            }
-        } while (!w->ret);
+        rb_native_mutex_lock(&vm->waitpid_lock);
+        list_del(&w->wnode);
+        rb_native_mutex_unlock(&vm->waitpid_lock);
     }
-    rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
     return Qfalse;
 }
 
-static VALUE
-waitpid_ensure(VALUE x)
+static void
+waitpid_wait(struct waitpid_state *w)
 {
-    struct waitpid_state *w = (struct waitpid_state *)x;
+    rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
 
-    if (w->ret <= 0) {
-        rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-        list_del_init(&w->wnode);
-        rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
+    /*
+     * Lock here to prevent do_waitpid from stealing work from the
+     * ruby_waitpid_locked done by mjit workers since mjit works
+     * outside of GVL
+     */
+    rb_native_mutex_lock(&vm->waitpid_lock);
+
+    w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+    if (w->ret) {
+        if (w->ret == -1) w->errnum = errno;
+
+        rb_native_mutex_unlock(&vm->waitpid_lock);
     }
+    else {
+        list_add(&vm->waiting_pids, &w->wnode);
+        rb_native_mutex_unlock(&vm->waitpid_lock);
 
-    rb_native_cond_destroy(&w->cond);
-    return Qfalse;
+        rb_ensure(waitpid_sleep, (VALUE)w, waitpid_ensure, (VALUE)w);
+    }
 }
 
 rb_pid_t
@@ -1059,11 +1049,11 @@ rb_waitpid(rb_pid_t pid, int *st, int flags)
     else {
         struct waitpid_state w;
 
-        waitpid_state_init(&w, GET_VM(), pid, flags);
-        rb_ensure(waitpid_wait, (VALUE)&w, waitpid_ensure, (VALUE)&w);
-        if (st) {
-            *st = w.status;
-        }
+        waitpid_state_init(&w, pid, flags);
+        w.is_ruby = 1;
+        w.wake.ec = GET_EC();
+        waitpid_wait(&w);
+        if (st) *st = w.status;
         result = w.ret;
     }
     if (result > 0) {
diff --git a/signal.c b/signal.c
index c20b01ea36..22fc4286d9 100644
--- a/signal.c
+++ b/signal.c
@@ -66,6 +66,14 @@ ruby_atomic_compare_and_swap(rb_atomic_t *ptr, rb_atomic_t cmp,
 # define NSIG (_SIGMAX + 1)      /* For QNX */
 #endif
 
+#if defined(SIGCLD)
+#  define RUBY_SIGCHLD    (SIGCLD)
+#elif defined(SIGCHLD)
+#  define RUBY_SIGCHLD    (SIGCHLD)
+#else
+#  define RUBY_SIGCHLD    (0)
+#endif
+
 static const struct signals {
     const char *signm;
     int  signo;
@@ -129,15 +137,9 @@ static const struct signals {
 #ifdef SIGCONT
     {"CONT", SIGCONT},
 #endif
-#ifdef SIGCHLD
-    {"CHLD", SIGCHLD},
-#endif
-#ifdef SIGCLD
-    {"CLD", SIGCLD},
-#else
-# ifdef SIGCHLD
-    {"CLD", SIGCHLD},
-# endif
+#if RUBY_SIGCHLD
+    {"CHLD", RUBY_SIGCHLD },
+    {"CLD", RUBY_SIGCHLD },
 #endif
 #ifdef SIGTTIN
     {"TTIN", SIGTTIN},
@@ -1052,18 +1054,7 @@ rb_trap_exit(void)
     }
 }
 
-static int
-sig_is_chld(int sig)
-{
-#if defined(SIGCLD)
-    return (sig == SIGCLD);
-#elif defined(SIGCHLD)
-    return (sig == SIGCHLD);
-#endif
-    return 0;
-}
-
-void rb_sigchld(rb_vm_t *); /* process.c */
+void rb_waitpid_all(rb_vm_t *); /* process.c */
 
 void
 rb_signal_exec(rb_thread_t *th, int sig)
@@ -1072,9 +1063,10 @@ rb_signal_exec(rb_thread_t *th, int sig)
     VALUE cmd = vm->trap_list.cmd[sig];
     int safe = vm->trap_list.safe[sig];
 
-    if (sig_is_chld(sig)) {
-	rb_sigchld(vm);
+    if (sig == RUBY_SIGCHLD) {
+        rb_waitpid_all(vm);
     }
+
     if (cmd == 0) {
 	switch (sig) {
 	  case SIGINT:
@@ -1134,10 +1126,8 @@ default_handler(int sig)
 #ifdef SIGUSR2
       case SIGUSR2:
 #endif
-#ifdef SIGCLD
-      case SIGCLD:
-#elif defined(SIGCHLD)
-      case SIGCHLD:
+#if RUBY_SIGCHLD
+      case RUBY_SIGCHLD:
 #endif
         func = sighandler;
         break;
@@ -1176,7 +1166,7 @@ trap_handler(VALUE *cmd, int sig)
     VALUE command;
 
     if (NIL_P(*cmd)) {
-	if (sig_is_chld(sig)) {
+	if (sig == RUBY_SIGCHLD) {
 	    goto sig_dfl;
 	}
 	func = SIG_IGN;
@@ -1199,9 +1189,9 @@ trap_handler(VALUE *cmd, int sig)
 		break;
               case 14:
 		if (memcmp(cptr, "SYSTEM_DEFAULT", 14) == 0) {
-		    if (sig_is_chld(sig)) {
-			goto sig_dfl;
-		    }
+                    if (sig == RUBY_SIGCHLD) {
+                        goto sig_dfl;
+                    }
                     func = SIG_DFL;
                     *cmd = 0;
 		}
@@ -1209,9 +1199,9 @@ trap_handler(VALUE *cmd, int sig)
 	      case 7:
 		if (memcmp(cptr, "SIG_IGN", 7) == 0) {
 sig_ign:
-		    if (sig_is_chld(sig)) {
-			goto sig_dfl;
-		    }
+                    if (sig == RUBY_SIGCHLD) {
+                        goto sig_dfl;
+                    }
                     func = SIG_IGN;
                     *cmd = Qtrue;
 		}
@@ -1443,7 +1433,7 @@ install_sighandler(int signum, sighandler_t handler)
 #  define install_sighandler(signum, handler) \
     INSTALL_SIGHANDLER(install_sighandler(signum, handler), #signum, signum)
 
-#if defined(SIGCLD) || defined(SIGCHLD)
+#if RUBY_SIGCHLD
 static int
 init_sigchld(int sig)
 {
@@ -1570,10 +1560,8 @@ Init_signal(void)
     install_sighandler(SIGSYS, sig_do_nothing);
 #endif
 
-#if defined(SIGCLD)
-    init_sigchld(SIGCLD);
-#elif defined(SIGCHLD)
-    init_sigchld(SIGCHLD);
+#if RUBY_SIGCHLD
+    init_sigchld(RUBY_SIGCHLD);
 #endif
 
     rb_enable_interrupt();
diff --git a/thread.c b/thread.c
index 8c9aafe07a..ab27c60632 100644
--- a/thread.c
+++ b/thread.c
@@ -1287,6 +1287,17 @@ rb_thread_sleep_forever(void)
     sleep_forever(GET_THREAD(), SLEEP_SPURIOUS_CHECK);
 }
 
+void
+rb_thread_sleep_interruptible(struct timespec *ts)
+{
+    rb_thread_t *th = GET_THREAD();
+    enum rb_thread_status prev_status = th->status;
+
+    th->status = THREAD_STOPPED;
+    native_sleep(th, ts);
+    th->status = prev_status;
+}
+
 void
 rb_thread_sleep_deadly(void)
 {
-- 
EW


  parent reply	other threads:[~2018-06-25 23:50 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-25 23:50 [PATCHv3 0/8] mjit SIGCHLD hijack series Eric Wong
2018-06-25 23:50 ` [PATCH 1/8] hijack SIGCHLD handler for internal use Eric Wong
2018-06-25 23:50 ` Eric Wong [this message]
2018-06-25 23:50 ` [PATCH 3/8] mjit.c: allow working on platforms without SIGCHLD Eric Wong
2018-06-25 23:50 ` [PATCH 4/8] cleanups Eric Wong
2018-06-25 23:50 ` [PATCH 5/8] handle SIGCHLD in both the timer-thread and main thread Eric Wong
2018-06-25 23:50 ` [PATCH 6/8] Revert "test_process.rb: skip tests for Bug 14867" Eric Wong
2018-06-25 23:50 ` [PATCH 7/8] Revert "spec: skip Process wait specs on MJIT" Eric Wong
2018-06-25 23:50 ` [PATCH 8/8] wip testing Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180625235051.66045-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=normalperson@yhbt.net \
    --cc=spew@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).