From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id C154D21848 for ; Tue, 1 May 2018 08:08:45 +0000 (UTC) From: Eric Wong To: spew@80x24.org Subject: [WIP v2 4/4] thread.c: native_sleep callers may perform GC Date: Tue, 1 May 2018 08:08:44 +0000 Message-Id: <20180501080844.22751-5-e@80x24.org> In-Reply-To: <20180501080844.22751-1-e@80x24.org> References: <20180501080844.22751-1-e@80x24.org> List-Id: It still makes sense to perform GC while using native_sleep for blocked Queue (or other operations with only "local" dependencies). This is because some GVL release operations can still leave other threads sleeping, as in the example below. The following goes from to 90MB to roughly 41MB depending on entropy: require 'thread' q = SizedQueue.new(128) nr = 100_000 th = Thread.new do File.open('/dev/urandom') do |rd| nr.times do q.push(rd.read(16384)) end end end nr.times { q.pop } th.join Unfortunately, this does not seem effective when the majority of garbage is generated AFTER entering native_sleep. Thus, I am unable to find improvement when using Thread#join without a timeout: nr = 100_000 th = Thread.new do File.open('/dev/urandom') do |rd| nr.times { rd.read(16384) } end end th.join So we may need to add heuristics for entering sleep for methods in thread.c and thread_sync.c and possibly continuing to schedule threads in THREAD_STOPPED_FOREVER state to enable them to perform cleanup. --- thread.c | 97 +++++++++++++++++++++++++++++++++++++-------------- thread_sync.c | 21 +++++++++-- 2 files changed, 88 insertions(+), 30 deletions(-) diff --git a/thread.c b/thread.c index 36dbe568e3..6aa3e8dc88 100644 --- a/thread.c +++ b/thread.c @@ -395,6 +395,14 @@ rb_thread_debug( } #endif +static void +thread_yield(rb_thread_t *th) +{ + RB_GC_SAVE_MACHINE_CONTEXT(th); + gvl_yield(th->vm, th); + rb_thread_set_current(th); +} + #include "thread_sync.c" void @@ -923,6 +931,7 @@ thread_join_sleep(VALUE arg) struct join_arg *p = (struct join_arg *)arg; rb_thread_t *target_th = p->target, *th = p->waiting; struct timespec end; + int do_gc = rb_gc_inprogress(th->ec); if (p->limit) { getclockofday(&end); @@ -932,10 +941,16 @@ thread_join_sleep(VALUE arg) while (target_th->status != THREAD_KILLED) { if (!p->limit) { th->status = THREAD_STOPPED_FOREVER; - th->vm->sleeper++; - rb_check_deadlock(th->vm); - native_sleep(th, 0); - th->vm->sleeper--; + if (do_gc && !gvl_contended_p(th->vm)) { + do_gc = rb_gc_step(th->ec); + thread_yield(th); /* let target_th do some work */ + } + else { + th->vm->sleeper++; + rb_check_deadlock(th->vm); + native_sleep(th, 0); + th->vm->sleeper--; + } } else { if (timespec_update_expire(p->limit, &end)) { @@ -944,13 +959,19 @@ thread_join_sleep(VALUE arg) return Qfalse; } th->status = THREAD_STOPPED; - native_sleep(th, p->limit); + if (do_gc && !gvl_contended_p(th->vm)) { + do_gc = rb_gc_step(th->ec); + thread_yield(th); /* let target_th do some work */ + } + else { + native_sleep(th, p->limit); + } } RUBY_VM_CHECK_INTS_BLOCKING(th->ec); th->status = THREAD_RUNNABLE; - thread_debug("thread_join: interrupted (thid: %"PRI_THREAD_ID", status: %s)\n", - thread_id_str(target_th), thread_status_name(target_th, TRUE)); } + thread_debug("thread_join: done (thid: %"PRI_THREAD_ID", status: %s)\n", + thread_id_str(target_th), thread_status_name(target_th, TRUE)); return Qtrue; } @@ -1149,21 +1170,33 @@ sleep_forever(rb_thread_t *th, int deadlockable, int spurious_check) { enum rb_thread_status prev_status = th->status; enum rb_thread_status status = deadlockable ? THREAD_STOPPED_FOREVER : THREAD_STOPPED; + int do_gc = rb_gc_inprogress(th->ec); th->status = status; RUBY_VM_CHECK_INTS_BLOCKING(th->ec); while (th->status == status) { - if (deadlockable) { - th->vm->sleeper++; - rb_check_deadlock(th->vm); - } - native_sleep(th, 0); - if (deadlockable) { - th->vm->sleeper--; - } - RUBY_VM_CHECK_INTS_BLOCKING(th->ec); - if (!spurious_check) - break; + if (do_gc && !gvl_contended_p(th->vm)) { + do_gc = rb_gc_step(th->ec); + thread_yield(th); /* let other threads send interrupts */ + if (!spurious_check && RUBY_VM_INTERRUPTED(th->ec)) { + RUBY_VM_CHECK_INTS_BLOCKING(th->ec); + break; + } + RUBY_VM_CHECK_INTS_BLOCKING(th->ec); + } + else { + if (deadlockable) { + th->vm->sleeper++; + rb_check_deadlock(th->vm); + } + native_sleep(th, 0); + if (deadlockable) { + th->vm->sleeper--; + } + RUBY_VM_CHECK_INTS_BLOCKING(th->ec); + if (!spurious_check) + break; + } } th->status = prev_status; } @@ -1252,18 +1285,30 @@ sleep_timespec(rb_thread_t *th, struct timespec ts, int spurious_check) { struct timespec end; enum rb_thread_status prev_status = th->status; + int do_gc = rb_gc_inprogress(th->ec); getclockofday(&end); timespec_add(&end, &ts); th->status = THREAD_STOPPED; RUBY_VM_CHECK_INTS_BLOCKING(th->ec); while (th->status == THREAD_STOPPED) { - native_sleep(th, &ts); - RUBY_VM_CHECK_INTS_BLOCKING(th->ec); - if (timespec_update_expire(&ts, &end)) - break; - if (!spurious_check) - break; + if (do_gc && !gvl_contended_p(th->vm)) { + do_gc = rb_gc_step(th->ec); + thread_yield(th); /* let other threads send interrupts */ + if (!spurious_check && RUBY_VM_INTERRUPTED(th->ec)) { + RUBY_VM_CHECK_INTS_BLOCKING(th->ec); + break; + } + RUBY_VM_CHECK_INTS_BLOCKING(th->ec); + } + else { + native_sleep(th, &ts); + RUBY_VM_CHECK_INTS_BLOCKING(th->ec); + if (!spurious_check) + break; + } + if (timespec_update_expire(&ts, &end)) + break; } th->status = prev_status; } @@ -1344,9 +1389,7 @@ rb_thread_schedule_limits(uint32_t limits_us) if (th->running_time_us >= limits_us) { thread_debug("rb_thread_schedule/switch start\n"); - RB_GC_SAVE_MACHINE_CONTEXT(th); - gvl_yield(th->vm, th); - rb_thread_set_current(th); + thread_yield(th); thread_debug("rb_thread_schedule/switch done\n"); } } diff --git a/thread_sync.c b/thread_sync.c index e7702f17d0..86a102151e 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -243,6 +243,7 @@ rb_mutex_lock(VALUE self) if (rb_mutex_trylock(self) == Qfalse) { struct sync_waiter w; + int do_gc = rb_gc_inprogress(th->ec); if (mutex->th == th) { rb_raise(rb_eThreadError, "deadlock; recursive locking"); @@ -270,7 +271,12 @@ rb_mutex_lock(VALUE self) } list_add_tail(&mutex->waitq, &w.node); - native_sleep(th, timeout); /* release GVL */ + if (do_gc) { + thread_yield(th); + } + else { + native_sleep(th, timeout); /* release GVL */ + } list_del(&w.node); if (!mutex->th) { mutex->th = th; @@ -288,8 +294,17 @@ rb_mutex_lock(VALUE self) } th->vm->sleeper--; - if (mutex->th == th) mutex_locked(th, self); - + if (mutex->th == th) { + mutex_locked(th, self); + } + if (do_gc) { + /* + * Likely no point in checking for GVL contention here + * this Mutex is already contended and we just yielded + * above. + */ + do_gc = rb_gc_step(th->ec); + } RUBY_VM_CHECK_INTS_BLOCKING(th->ec); } } -- EW