From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: * X-Spam-ASN: AS60068 185.152.64.0/23 X-Spam-Status: No, score=1.2 required=3.0 tests=AWL,BAYES_00,RCVD_IN_MSPIKE_BL, RCVD_IN_MSPIKE_ZBI,RCVD_IN_XBL,SPF_FAIL,SPF_HELO_FAIL shortcircuit=no autolearn=no autolearn_force=no version=3.4.0 Received: from 80x24.org (tor-exit-node.1.justaguy.be [185.152.65.180]) by dcvr.yhbt.net (Postfix) with ESMTP id D4DEB1F42E for ; Sun, 29 Apr 2018 03:50:16 +0000 (UTC) From: Eric Wong To: spew@80x24.org Subject: [PATCH 2/2] gc: rb_wait_for_single_fd performs GC if idle (Linux) Date: Sun, 29 Apr 2018 03:50:07 +0000 Message-Id: <20180429035007.6499-3-e@80x24.org> In-Reply-To: <20180429035007.6499-1-e@80x24.org> References: <20180429035007.6499-1-e@80x24.org> List-Id: Before this patch, the entropy-dependent script below takes 95MB consistently on my system. Now, depending on the amount of entropy on my system, it takes anywhere from 43MB to 75MB. I'm using /dev/urandom to simulate real-world network latency variations. There is no improvement when using /dev/zero because the process is never idle. require 'net/http' require 'digest/md5' Thread.abort_on_exception = true s = TCPServer.new('127.0.0.1', 0) len = 1024 * 1024 * 1024 th = Thread.new do c = s.accept c.readpartial(16384) c.write("HTTP/1.0 200 OK\r\nContent-Length: #{len}\r\n\r\n") IO.copy_stream('/dev/urandom', c, len) c.close end addr = s.addr Net::HTTP.start(addr[3], addr[1]) do |http| http.request_get('/') do |res| dig = Digest::MD5.new res.read_body { |buf| dig.update(buf) } puts dig.hexdigest end end The above script is also dependent on net/protocol using read_nonblock. Ordinary IO objects will need IO#nonblock=true to see benefits (because they never hit rb_wait_for_single_fd) * gc.c (rb_gc_inprogress): new function (rb_gc_step): ditto * internal.h: declare prototypes for new gc.c functions * thread_pthread.c (gvl_contended_p): new function * thread_win32.c (gvl_contended_p): ditto (dummy) * thread.c (rb_wait_for_single_fd w/ ppoll): use new functions to perform GC while GVL is uncontended and GC is lazy sweeping or incremental marking [ruby-core:86265] --- gc.c | 21 +++++++++++++++++++++ internal.h | 4 ++++ thread.c | 21 +++++++++++++++------ thread_pthread.c | 6 ++++++ thread_win32.c | 6 ++++++ 5 files changed, 52 insertions(+), 6 deletions(-) diff --git a/gc.c b/gc.c index b9c1305060..143ef2a2c5 100644 --- a/gc.c +++ b/gc.c @@ -6518,6 +6518,27 @@ gc_rest(rb_objspace_t *objspace) } } +int +rb_gc_inprogress(const rb_execution_context_t *ec) +{ + rb_objspace_t *objspace = rb_ec_vm_ptr(ec)->objspace; + + /* TODO: should this also check is_incremental_marking() ? */ + return is_lazy_sweeping(&objspace->eden_heap) || + is_incremental_marking(objspace); +} + +/* returns true if there is more work to do, false if not */ +int +rb_gc_step(const rb_execution_context_t *ec) +{ + rb_objspace_t *objspace = rb_ec_vm_ptr(ec)->objspace; + + gc_rest(objspace); + + return rb_gc_inprogress(ec); +} + struct objspace_and_reason { rb_objspace_t *objspace; int reason; diff --git a/internal.h b/internal.h index 85370ec0d7..43043e6601 100644 --- a/internal.h +++ b/internal.h @@ -1290,6 +1290,10 @@ void rb_gc_writebarrier_remember(VALUE obj); void ruby_gc_set_params(int safe_level); void rb_copy_wb_protected_attribute(VALUE dest, VALUE obj); +struct rb_execution_context_struct; +int rb_gc_inprogress(const struct rb_execution_context_struct *); +int rb_gc_step(const struct rb_execution_context_struct *); + #if defined(HAVE_MALLOC_USABLE_SIZE) || defined(HAVE_MALLOC_SIZE) || defined(_WIN32) #define ruby_sized_xrealloc(ptr, new_size, old_size) ruby_xrealloc(ptr, new_size) #define ruby_sized_xrealloc2(ptr, new_count, element_size, old_count) ruby_xrealloc(ptr, new_count, element_size) diff --git a/thread.c b/thread.c index 65844f5442..4725c809b8 100644 --- a/thread.c +++ b/thread.c @@ -3961,10 +3961,12 @@ ppoll(struct pollfd *fds, nfds_t nfds, int rb_wait_for_single_fd(int fd, int events, struct timeval *timeout) { + static const struct timespec zero; struct pollfd fds; int result = 0, lerrno; struct timespec ts, end, *tsp; rb_thread_t *th = GET_THREAD(); + int do_gc = rb_gc_inprogress(th->ec); timeout_prepare(&tsp, &ts, &end, timeout); fds.fd = fd; @@ -3973,13 +3975,20 @@ rb_wait_for_single_fd(int fd, int events, struct timeval *timeout) do { fds.revents = 0; lerrno = 0; - BLOCKING_REGION({ - result = ppoll(&fds, 1, tsp, NULL); - if (result < 0) lerrno = errno; - }, ubf_select, th, FALSE); - RUBY_VM_CHECK_INTS_BLOCKING(th->ec); - } while (result < 0 && retryable(errno = lerrno) && + if (!do_gc || gvl_contended_p(th->vm)) { + BLOCKING_REGION({ + result = ppoll(&fds, 1, tsp, NULL); + if (result < 0) lerrno = errno; + }, ubf_select, th, FALSE); + RUBY_VM_CHECK_INTS_BLOCKING(th->ec); + } + else { /* no need to release GVL if nobody is waiting for it */ + do_gc = rb_gc_step(th->ec); + result = ppoll(&fds, 1, &zero, NULL); + if (result < 0) lerrno = errno; + } + } while ((result == 0 || (result < 0 && retryable(errno = lerrno))) && update_timespec(tsp, &end)); if (result < 0) return -1; diff --git a/thread_pthread.c b/thread_pthread.c index 6337620e8a..fccac48a44 100644 --- a/thread_pthread.c +++ b/thread_pthread.c @@ -156,6 +156,12 @@ gvl_yield(rb_vm_t *vm, rb_thread_t *th) rb_native_mutex_unlock(&vm->gvl.lock); } +static int +gvl_contended_p(const rb_vm_t *vm) +{ + return vm->gvl.waiting > 0; +} + static void gvl_init(rb_vm_t *vm) { diff --git a/thread_win32.c b/thread_win32.c index ab308905cb..3e3a62dd59 100644 --- a/thread_win32.c +++ b/thread_win32.c @@ -113,6 +113,12 @@ gvl_yield(rb_vm_t *vm, rb_thread_t *th) gvl_acquire(vm, th); } +static void +gvl_contended_p(const rb_vm_t *vm) +{ + return 1; /* TODO for win32 maintainer */ +} + static void gvl_init(rb_vm_t *vm) { -- EW