hail-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pete Zaitcev <zaitcev@redhat.com>
To: Jeff Garzik <jeff@garzik.org>
Cc: Project Hail List <hail-devel@vger.kernel.org>
Subject: [tabled patch 1/1] fix the selection of chunk
Date: Tue, 25 May 2010 21:30:41 -0600	[thread overview]
Message-ID: <20100525213041.53a1198e@redhat.com> (raw)

If a chunkserver goes down, tabled sometimes throws a phantom "object
not found". It happens because we keep hitting the same down node and
exhaust the retries. The existing code calls rand() every time and
hopes for the best, but this is too likely to end poorly.

The fix is to only randomize once before the retry loop, and then
cycle through all available nodes deterministically. The same fix
would apply even if we used a better technique to select an available
chunkserver than just random.

Also, we refactor the code just a little bit, so that the enormous
function object_get_body gets somewhat easier to follow.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>

---
 server/object.c |   92 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 64 insertions(+), 28 deletions(-)

diff -urp -X dontdiff tabled-a/server/object.c tabled-m/server/object.c
--- tabled-a/server/object.c	2010-05-25 21:06:37.000000000 -0600
+++ tabled-m/server/object.c	2010-05-25 20:53:26.000000000 -0600
@@ -1059,6 +1059,49 @@ static void object_get_event(struct open
 	cli_write_run_compl();
 }
 
+static int object_node_count_up(struct db_obj_ent *obj)
+{
+	int n;
+	int i;
+	uint32_t nid;
+	struct storage_node *stnode;
+
+	n = 0;
+	for (i = 0; i < MAXWAY; i++) {
+		nid = GUINT32_FROM_LE(obj->d.a.nidv[i]);
+		if (nid) {
+			stnode = stor_node_by_nid(nid);
+			if (stnode) {
+				if (stnode->up)
+					n++;
+				stor_node_put(stnode);
+			}
+		}
+	}
+	return n;
+}
+
+static struct storage_node *object_node_select(int *nx, struct db_obj_ent *obj)
+{
+	int i;
+	uint32_t nid;
+	struct storage_node *stnode;
+
+	for (i = 0; i < MAXWAY; i++) {
+		nid = GUINT32_FROM_LE(obj->d.a.nidv[*nx]);
+		if (nid) {
+			stnode = stor_node_by_nid(nid);
+			if (stnode) {
+				if (stnode->up)
+					return stnode;
+				stor_node_put(stnode);
+			}
+		}
+		*nx = (*nx + 1) % MAXWAY;
+	}
+	return NULL;
+}
+
 static bool object_get_body(struct client *cli, const char *user,
 			    const char *bucket, const char *key, bool want_body)
 {
@@ -1159,48 +1202,37 @@ static bool object_get_body(struct clien
 }
 
 	cli->in_objid = GUINT64_FROM_LE(obj->d.a.oid);
+	cli->in_retry = object_node_count_up(obj) * 2;
 
-	n = 0;
-	for (i = 0; i < MAXWAY; i++ ) {
-		uint32_t nid;
-		nid = GUINT32_FROM_LE(obj->d.a.nidv[i]);
-		if (nid)
-			n++;
-	}
-	cli->in_retry = n * 2;
+	/*
+	 * Seed n outside of the retry loop because we definitely want to cycle,
+	 * else users see phantom unavailability of keys when nodes go down.
+	 */
+	n = rand() % MAXWAY;
 
  stnode_open_retry:
 	if (cli->in_retry == 0) {
-		applog(LOG_ERR, "No input nodes for oid %llX", cli->in_objid);
+		applog(LOG_ERR, "No ready nodes for oid %llX", cli->in_objid);
 		goto err_out_str;
 	}
 	--cli->in_retry;
 
-	stnode = NULL;
-	n = rand() % MAXWAY;
-	for (i = 0; i < MAXWAY; i++ ) {
-		uint32_t nid;
-		nid = GUINT32_FROM_LE(obj->d.a.nidv[n]);
-		if (nid) {
-			stnode = stor_node_by_nid(nid);
-			if (stnode) {
-				if (debugging)
-					applog(LOG_DEBUG,
-					       "Selected nid %u for oid %llX",
-					       nid, cli->in_objid);
-				stor_node_put(stnode);
-				break;
-			}
-		}
-		n = (n + 1) % MAXWAY;
+	stnode = object_node_select(&n, obj);
+	if (!stnode) {
+		applog(LOG_ERR, "No known nodes for oid %llX", cli->in_objid);
+		goto err_out_str;
 	}
-	if (!stnode)
-		goto stnode_open_retry;
+
+	if (debugging)
+		applog(LOG_DEBUG, "Selected nid %u for oid %llX",
+		       stnode->id, cli->in_objid);
 
 	rc = stor_open(&cli->in_ce, stnode, tabled_srv.evbase_main);
 	if (rc < 0) {
 		applog(LOG_WARNING, "Cannot open input chunk, nid %u (%d)",
 		       stnode->id, rc);
+		stor_node_put(stnode);
+		n = (n + 1) % MAXWAY;
 		goto stnode_open_retry;
 	}
 
@@ -1210,10 +1242,14 @@ static bool object_get_body(struct clien
 		applog(LOG_ERR, "Cannot start nid %u for oid %llX (%d)",
 		       stnode->id, (unsigned long long) cli->in_objid, rc);
 		stor_close(&cli->in_ce);
+		stor_node_put(stnode);
+		n = (n + 1) % MAXWAY;
 		goto stnode_open_retry;
 	}
 	cli->in_ce.cli = cli;
 
+	stor_node_put(stnode);
+
 	hdr = req_hdr(&cli->req, "if-unmodified-since");
 	if (hdr) {
 		time_t t;

             reply	other threads:[~2010-05-26  3:30 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-26  3:30 Pete Zaitcev [this message]
2010-05-26  3:46 ` [tabled patch 1/1] fix the selection of chunk Jeff Garzik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100525213041.53a1198e@redhat.com \
    --to=zaitcev@redhat.com \
    --cc=hail-devel@vger.kernel.org \
    --cc=jeff@garzik.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).