hail-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pete Zaitcev <zaitcev@redhat.com>
To: Jeff Garzik <jeff@garzik.org>
Cc: Project Hail List <hail-devel@vger.kernel.org>
Subject: [Patch 06/12] Chunk: do not call timer_add from event context
Date: Sat, 17 Apr 2010 22:41:37 -0600	[thread overview]
Message-ID: <20100417224137.40171324@redhat.com> (raw)

No matter if timer or cld_timer is used, this was not valid.
Obviously, locking is missing, so only one thread can access a
certain tlist. But the actual hang was more interesting than a
race and crash. Suppose that we add the first timer. In that
case, main thread invokes poll() with no timeout. If we add
the timer from the event callback, nobody wakes up the poll,
so the newly-added timer is not checked for expiration and never
fires, causing the hang.

Note that if we look at the whole stack, the session failure event
is bounced through two pipes: one in ncld (loopback), and another
here. The pipe in ncld is not really needed for this, but we use
it for convenience. Seems weird and inefficient, but it's short
code and session events are not performance-critical.

P.S. There's also an important bugfix in this patch: the is_dead
should not be cleared if a retry timeout is scheduled.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>

---
 server/cldu.c |   77 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 8 deletions(-)

commit d30028b4c681e99f7934ca264a175548c827ff04
Author: Master <zaitcev@lembas.zaitcev.lan>
Date:   Sat Apr 17 19:46:35 2010 -0600

    Calling timer_add from event thread is invalid. Use a pipe instead.

diff --git a/server/cldu.c b/server/cldu.c
index 957bd81..fafcc3b 100644
--- a/server/cldu.c
+++ b/server/cldu.c
@@ -47,6 +47,7 @@ struct cld_session {
 	struct cld_host cldv[N_CLD];
 
 	struct cld_timer timer;
+	int event_pipe[2];
 
 	char *ffname;
 	struct ncld_fh *ffh;	/* keep open for lock */
@@ -95,9 +96,8 @@ static void cldu_saveargs(struct cld_session *sp, char *infopath,
 	sp->ploc = loc;
 }
 
-static void cldu_timer_event(struct cld_timer *timer)
+static void cldu_sess_proc(struct cld_session *cs)
 {
-	struct cld_session *cs = timer->userdata;
 	int newactive;
 
 	if (cs->is_dead) {
@@ -106,29 +106,58 @@ static void cldu_timer_event(struct cld_timer *timer)
 		if (debugging)
 			applog(LOG_DEBUG, "Reopening Chunk in %s", cs->ffname);
 
-		ncld_sess_close(cs->nsess);
-		cs->nsess = NULL;
+		if (cs->nsess) {
+			ncld_sess_close(cs->nsess);
+			cs->nsess = NULL;
+		}
 		cs->ffh = NULL;			/* closed automatically */
-		cs->is_dead = false;
 		newactive = cldu_nextactive(cs);
 		if (cldu_set_cldc(cs, newactive)) {
 			/* Oops, should not happen. Just loop, then... */
 			timer_add(&cs->timer, time(NULL) + 30);
 			return;
 		}
+		cs->is_dead = false;
+	} else {
+		/*
+		 * We want to see if this ever happens.
+		 * Probably harmless, but... let's print it.
+		 */
+		applog(LOG_WARNING, "Event on non-dead session");
 	}
 }
 
+static void cldu_timer_event(struct cld_timer *timer)
+{
+	struct cld_session *cs = timer->userdata;
+
+	cldu_sess_proc(cs);
+}
+
+static bool cldu_pipe_event(int fd, short events, void *userdata)
+{
+	struct cld_session *cs = userdata;
+	unsigned char cmd;
+	ssize_t rc;
+
+	rc = read(fd, &cmd, 1);
+	if (rc > 0)
+		cldu_sess_proc(cs);
+	else
+		applog(LOG_WARNING, "Stray CLD event pipe poll");
+	return true;
+}
+
 static void cldu_sess_event(void *priv, uint32_t what)
 {
 	struct cld_session *cs = priv;
+	unsigned char cmd;
 
 	if (what == CE_SESS_FAILED) {
 		/*
 		 * In ncld, we are not allowed to free the session structures
 		 * from an event (it's wages of all-conquering 100% reliable
-		 * ncld_close_sess), so we bounce that off to a thread. Which
-		 * we do not have, so we steal a timer for an erzatz thread.
+		 * ncld_close_sess), so we bounce that off to the main thread.
 		 */
 		if (cs->nsess) {
 			applog(LOG_ERR, "Session failed, sid " SIDFMT,
@@ -137,7 +166,10 @@ static void cldu_sess_event(void *priv, uint32_t what)
 			applog(LOG_ERR, "Session open failed");
 		}
 		cs->is_dead = true;
-		timer_add(&cs->timer, time(NULL) + 1);
+		cmd = 1;
+		if (write(cs->event_pipe[1], &cmd, 1) < 1) {
+			applog(LOG_ERR, "Pipe write failed: %d", errno);
+		}
 	} else {
 		if (cs)
 			applog(LOG_INFO, "cldc event 0x%x sid " SIDFMT,
@@ -438,6 +470,7 @@ int cld_begin(const char *thishost, uint32_t nid, char *infopath,
 	      struct geo *locp, void (*cb)(enum st_cld))
 {
 	static struct cld_session *cs = &ses;
+	struct server_poll *sp;
 
 	if (!nid)
 		return 0;
@@ -483,6 +516,24 @@ int cld_begin(const char *thishost, uint32_t nid, char *infopath,
 		g_list_free(host_list);
 	}
 
+	if (pipe(cs->event_pipe) < 0) {
+		applog(LOG_ERR, "Cannot open pipe: %s", strerror(errno));
+		goto err_pipe;
+	}
+
+	sp = calloc(1, sizeof(*sp));
+	if (!sp) {
+		applog(LOG_ERR, "No core");
+		goto err_sp;
+	}
+
+	sp->fd = cs->event_pipe[0];
+	sp->events = POLLIN;
+	sp->cb = cldu_pipe_event;
+	sp->userdata = cs;
+
+	g_hash_table_insert(chunkd_srv.fd_info, GINT_TO_POINTER(sp->fd), sp);
+
 	/*
 	 * FIXME: We should find next suitable host according to
 	 * the priority and weight (among those which are up).
@@ -497,6 +548,11 @@ int cld_begin(const char *thishost, uint32_t nid, char *infopath,
 	return 0;
 
 err_net:
+	g_hash_table_remove(chunkd_srv.fd_info, GINT_TO_POINTER(sp->fd));
+err_sp:
+	close(cs->event_pipe[0]);
+	close(cs->event_pipe[1]);
+err_pipe:
 err_addr:
 	return -1;
 }
@@ -547,6 +603,11 @@ void cld_end(void)
 		}
 	}
 
+	g_hash_table_remove(chunkd_srv.fd_info,
+			    GINT_TO_POINTER(cs->event_pipe[0]));
+	close(cs->event_pipe[0]);
+	close(cs->event_pipe[1]);
+
 	free(cs->ffname);
 	cs->ffname = NULL;
 }

                 reply	other threads:[~2010-04-18  4:41 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100417224137.40171324@redhat.com \
    --to=zaitcev@redhat.com \
    --cc=hail-devel@vger.kernel.org \
    --cc=jeff@garzik.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).