dumping ground for random patches and texts
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [WIP v2 3/4] thread.c (do_select): perform GC if idle
Date: Tue,  1 May 2018 08:08:43 +0000	[thread overview]
Message-ID: <20180501080844.22751-4-e@80x24.org> (raw)
In-Reply-To: <20180501080844.22751-1-e@80x24.org>

Continuing on the GC-on-idle idea, the process may perform
useful GC work if we are waiting on the select(2) system call.

Typical use of IO.select on Linux systems is NOT after EAGAIN
(unlike rb_wait_for_single_fd-with-ppoll), so we have fewer
heuristics on whether or not initial select(2) will succeed.
Hence, we make initial select() call before attempting GC.

Also, explain the difference in ordering for the previous
ppoll patch.

The following script goes from 56MB to around 32MB depending
on entropy in the system.

  Thread.abort_on_exception = true
  len = 100_000_000
  rd, wr = IO.pipe
  readers = 10.times.map { rd.dup }
  th = Thread.new do
    IO.copy_stream('/dev/urandom', wr, len)
    wr.close
  end

  until readers.empty?
    ready = IO.select(readers)
    ready[0].each do |r|
      r.read(16384) or readers.clear
    end
  end
  th.join

v2 - account for select(2) timeval modifications
     Unlike the timeout for ppoll, the select(2) timeout is not
     "const" and may be modified.
---
 thread.c | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/thread.c b/thread.c
index 4625d082fd..36dbe568e3 100644
--- a/thread.c
+++ b/thread.c
@@ -3826,6 +3826,7 @@ do_select(int n, rb_fdset_t *const readfds, rb_fdset_t *const writefds,
     rb_fdset_t MAYBE_UNUSED(orig_except);
     struct timespec ts, end, *tsp;
     rb_thread_t *th = GET_THREAD();
+    int do_gc = 1; /* we call select() before rb_gc_step() anyways */
 
     timeout_prepare(&tsp, &ts, &end, timeout);
 #define do_select_update() \
@@ -3843,15 +3844,29 @@ do_select(int n, rb_fdset_t *const readfds, rb_fdset_t *const writefds,
 
     do {
 	lerrno = 0;
-
-	BLOCKING_REGION({
-	    result = native_fd_select(n, readfds, writefds, exceptfds,
-				      timeval_for(timeout, tsp), th);
-	    if (result < 0) lerrno = errno;
-	}, ubf_select, th, FALSE);
-
-	RUBY_VM_CHECK_INTS_BLOCKING(th->ec);
-    } while (result < 0 && retryable(errno = lerrno) && do_select_update());
+        if (!do_gc || gvl_contended_p(th->vm)) {
+            BLOCKING_REGION({
+                result = native_fd_select(n, readfds, writefds, exceptfds,
+                                          timeval_for(timeout, tsp), th);
+                if (result < 0) lerrno = errno;
+            }, ubf_select, th, FALSE);
+            RUBY_VM_CHECK_INTS_BLOCKING(th->ec);
+        }
+        else { /* no need to release GVL if nobody is waiting for it */
+            struct timeval zero = { 0, 0 }; /* select(2) may modify this */
+
+            /*
+             * For IO.select callers sometimes do NOT hit EAGAIN before
+             * calling this function, so native_fd_select may succeed
+             * on the first try before we get to GC.
+             */
+            result = native_fd_select(n, readfds, writefds, exceptfds,
+                                      &zero, th);
+            if (result < 0) lerrno = errno;
+            if (result == 0) do_gc = rb_gc_step(th->ec);
+        }
+    } while ((result == 0 || (result < 0 && retryable(errno = lerrno))) &&
+             do_select_update());
 
 #define fd_term(f) if (f##fds) rb_fd_term(&orig_##f)
     fd_term(read);
@@ -3985,6 +4000,11 @@ rb_wait_for_single_fd(int fd, int events, struct timeval *timeout)
             RUBY_VM_CHECK_INTS_BLOCKING(th->ec);
         }
         else { /* no need to release GVL if nobody is waiting for it */
+            /*
+             * we typically enter this function on EAGAIN, so we have
+             * a low likelyhood of ppoll succeeding right away, thus
+             * we do GC before initial ppoll
+             */
             do_gc = rb_gc_step(th->ec);
             result = ppoll(&fds, 1, &zero, NULL);
             if (result < 0) lerrno = errno;
-- 
EW


  parent reply	other threads:[~2018-05-01  8:08 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-01  8:08 [WIP v2 0/4] sleepy GC Eric Wong
2018-05-01  8:08 ` [WIP v2 1/4] thread.c (timeout_prepare): common function Eric Wong
2018-05-01  8:08 ` [WIP v2 2/4] gc: rb_wait_for_single_fd performs GC if idle (Linux) Eric Wong
2018-05-01  8:08 ` Eric Wong [this message]
2018-05-01  8:08 ` [WIP v2 4/4] thread.c: native_sleep callers may perform GC Eric Wong
2018-05-02  4:42   ` [PATCH 5/4] thread_sync.c (mutex_lock): add missing else Eric Wong
2018-05-02  4:52 ` [PATCH 6/4] gc.c: allow disabling sleepy GC Eric Wong
2018-05-02  4:57 ` [PATCH] benchmark: add benchmarks for " Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180501080844.22751-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=spew@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).