From: Alexander Aring <aahringo@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH dlm-next 12/13] fs: dlm: create midcomms nodes when configure
Date: Thu, 27 Jul 2023 11:24:27 -0400 [thread overview]
Message-ID: <CAK-6q+iFEp-uieJ5XoN3nzwJvBbcE3MeNyGRJx6U9D5PP7OULg@mail.gmail.com> (raw)
In-Reply-To: <20230727132303.3352514-12-aahringo@redhat.com>
Hi,
On Thu, Jul 27, 2023 at 9:23?AM Alexander Aring <aahringo@redhat.com> wrote:
>
> This patch puts the life of a midcomms node the same as a lowcomms
> connection. The lowcomms connection lifetime was changed by commit
> 6f0b0b5d7ae7 ("fs: dlm: remove dlm_node_addrs lookup list"). In the
> future the midcomms node instances can be merged with lowcomms
> connection structure as the lifetime is the same and states can be
> controlled over values or flags.
>
> Before midcomms nodes were generated during version detection. This is
> not necessary anymore when the nodes are created when the cluster
> manager configures DLM via configfs. When a midcomms node is created over
> configfs it well set DLM_VERSION_NOT_SET as version. This indicates that
> the version of the midcomms node is still unknown and need to be probed
> via certain rcom messages.
>
> Signed-off-by: Alexander Aring <aahringo@redhat.com>
> ---
> fs/dlm/config.c | 2 +-
> fs/dlm/midcomms.c | 286 +++++++++++++++++-----------------------------
> fs/dlm/midcomms.h | 1 +
> 3 files changed, 110 insertions(+), 179 deletions(-)
>
> diff --git a/fs/dlm/config.c b/fs/dlm/config.c
> index 2beceff024e3..e55e0a2cd2e8 100644
> --- a/fs/dlm/config.c
> +++ b/fs/dlm/config.c
> @@ -664,7 +664,7 @@ static ssize_t comm_addr_store(struct config_item *item, const char *buf,
>
> memcpy(addr, buf, len);
>
> - rv = dlm_lowcomms_addr(cm->nodeid, addr, len);
> + rv = dlm_midcomms_addr(cm->nodeid, addr, len);
> if (rv) {
> kfree(addr);
> return rv;
> diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
> index c125496e03bf..24952ecf8e1a 100644
> --- a/fs/dlm/midcomms.c
> +++ b/fs/dlm/midcomms.c
> @@ -330,18 +330,23 @@ static void midcomms_node_reset(struct midcomms_node *node)
> wake_up(&node->shutdown_wait);
> }
>
> -static struct midcomms_node *nodeid2node(int nodeid, gfp_t alloc)
> +static struct midcomms_node *nodeid2node(int nodeid)
> {
> - struct midcomms_node *node, *tmp;
> - int r = nodeid_hash(nodeid);
> + return __find_node(nodeid, nodeid_hash(nodeid));
> +}
> +
> +int dlm_midcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
> +{
> + int ret, r = nodeid_hash(nodeid);
> + struct midcomms_node *node;
>
> - node = __find_node(nodeid, r);
> - if (node || !alloc)
> - return node;
> + ret = dlm_lowcomms_addr(nodeid, addr, len);
> + if (ret)
> + return ret;
>
> - node = kmalloc(sizeof(*node), alloc);
> + node = kmalloc(sizeof(*node), GFP_NOFS);
> if (!node)
> - return NULL;
> + return -ENOMEM;
>
> node->nodeid = nodeid;
> spin_lock_init(&node->state_lock);
> @@ -353,21 +358,11 @@ static struct midcomms_node *nodeid2node(int nodeid, gfp_t alloc)
> midcomms_node_reset(node);
>
> spin_lock(&nodes_lock);
> - /* check again if there was somebody else
> - * earlier here to add the node
> - */
> - tmp = __find_node(nodeid, r);
> - if (tmp) {
> - spin_unlock(&nodes_lock);
> - kfree(node);
> - return tmp;
> - }
> -
> hlist_add_head_rcu(&node->hlist, &node_hash[r]);
> spin_unlock(&nodes_lock);
>
> node->debugfs = dlm_create_debug_comms_file(nodeid, node);
> - return node;
> + return 0;
> }
>
> static int dlm_send_ack(int nodeid, uint32_t seq)
> @@ -603,112 +598,6 @@ static void dlm_midcomms_receive_buffer(const union dlm_packet *p,
> }
> }
>
> -static struct midcomms_node *
> -dlm_midcomms_recv_node_lookup(int nodeid, const union dlm_packet *p,
> - uint16_t msglen, int (*cb)(struct midcomms_node *node))
> -{
> - struct midcomms_node *node = NULL;
> - gfp_t allocation = 0;
> - int ret;
> -
> - switch (p->header.h_cmd) {
> - case DLM_RCOM:
> - if (msglen < sizeof(struct dlm_rcom)) {
> - log_print("rcom msg too small: %u, will skip this message from node %d",
> - msglen, nodeid);
> - return NULL;
> - }
> -
> - switch (p->rcom.rc_type) {
> - case cpu_to_le32(DLM_RCOM_NAMES):
> - fallthrough;
> - case cpu_to_le32(DLM_RCOM_NAMES_REPLY):
> - fallthrough;
> - case cpu_to_le32(DLM_RCOM_STATUS):
> - fallthrough;
> - case cpu_to_le32(DLM_RCOM_STATUS_REPLY):
> - node = nodeid2node(nodeid, 0);
> - if (node) {
> - spin_lock(&node->state_lock);
> - if (node->state != DLM_ESTABLISHED)
> - pr_debug("receive begin RCOM msg from node %d with state %s\n",
> - node->nodeid, dlm_state_str(node->state));
> -
> - switch (node->state) {
> - case DLM_CLOSED:
> - node->state = DLM_ESTABLISHED;
> - pr_debug("switch node %d to state %s\n",
> - node->nodeid, dlm_state_str(node->state));
> - break;
> - case DLM_ESTABLISHED:
> - break;
> - default:
> - spin_unlock(&node->state_lock);
> - return NULL;
> - }
> - spin_unlock(&node->state_lock);
> - }
> -
> - allocation = GFP_NOFS;
> - break;
> - default:
> - break;
> - }
> -
> - break;
> - default:
> - break;
> - }
> -
> - node = nodeid2node(nodeid, allocation);
> - if (!node) {
> - switch (p->header.h_cmd) {
> - case DLM_OPTS:
> - if (msglen < sizeof(struct dlm_opts)) {
> - log_print("opts msg too small: %u, will skip this message from node %d",
> - msglen, nodeid);
> - return NULL;
> - }
> -
> - log_print_ratelimited("received dlm opts message nextcmd %d from node %d in an invalid sequence",
> - p->opts.o_nextcmd, nodeid);
> - break;
> - default:
> - log_print_ratelimited("received dlm message cmd %d from node %d in an invalid sequence",
> - p->header.h_cmd, nodeid);
> - break;
> - }
> -
> - return NULL;
> - }
> -
> - ret = cb(node);
> - if (ret < 0)
> - return NULL;
> -
> - return node;
> -}
> -
> -static int dlm_midcomms_version_check_3_2(struct midcomms_node *node)
> -{
> - switch (node->version) {
> - case DLM_VERSION_NOT_SET:
> - node->version = DLM_VERSION_3_2;
> - wake_up(&node->shutdown_wait);
> - log_print("version 0x%08x for node %d detected", DLM_VERSION_3_2,
> - node->nodeid);
> - break;
> - case DLM_VERSION_3_2:
> - break;
> - default:
> - log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
> - DLM_VERSION_3_2, node->nodeid, node->version);
> - return -1;
> - }
> -
> - return 0;
> -}
> -
> static int dlm_opts_check_msglen(const union dlm_packet *p, uint16_t msglen,
> int nodeid)
> {
> @@ -767,10 +656,37 @@ static void dlm_midcomms_receive_buffer_3_2(const union dlm_packet *p, int nodei
> int ret, idx;
>
> idx = srcu_read_lock(&nodes_srcu);
> - node = dlm_midcomms_recv_node_lookup(nodeid, p, msglen,
> - dlm_midcomms_version_check_3_2);
> - if (!node)
> + node = nodeid2node(nodeid);
> + if (WARN_ON_ONCE(!node))
> + goto out;
> +
> + switch (node->version) {
> + case DLM_VERSION_NOT_SET:
> + node->version = DLM_VERSION_3_2;
> + wake_up(&node->shutdown_wait);
> + log_print("version 0x%08x for node %d detected", DLM_VERSION_3_2,
> + node->nodeid);
> +
> + spin_lock(&node->state_lock);
> + switch (node->state) {
> + case DLM_CLOSED:
> + node->state = DLM_ESTABLISHED;
> + pr_debug("switch node %d to state %s\n",
> + node->nodeid, dlm_state_str(node->state));
> + break;
> + default:
> + break;
> + }
> + spin_unlock(&node->state_lock);
> +
> + break;
> + case DLM_VERSION_3_2:
> + break;
> + default:
> + log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
> + DLM_VERSION_3_2, node->nodeid, node->version);
> goto out;
> + }
>
> switch (p->header.h_cmd) {
> case DLM_RCOM:
> @@ -860,8 +776,19 @@ static void dlm_midcomms_receive_buffer_3_2(const union dlm_packet *p, int nodei
> srcu_read_unlock(&nodes_srcu, idx);
> }
>
> -static int dlm_midcomms_version_check_3_1(struct midcomms_node *node)
> +static void dlm_midcomms_receive_buffer_3_1(const union dlm_packet *p, int nodeid)
> {
> + uint16_t msglen = le16_to_cpu(p->header.h_length);
> + struct midcomms_node *node;
> + int idx;
> +
> + idx = srcu_read_lock(&nodes_srcu);
> + node = nodeid2node(nodeid);
> + if (WARN_ON_ONCE(!node)) {
> + srcu_read_unlock(&nodes_srcu, idx);
> + return;
> + }
> +
> switch (node->version) {
> case DLM_VERSION_NOT_SET:
> node->version = DLM_VERSION_3_1;
> @@ -874,22 +801,6 @@ static int dlm_midcomms_version_check_3_1(struct midcomms_node *node)
> default:
> log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
> DLM_VERSION_3_1, node->nodeid, node->version);
> - return -1;
> - }
> -
> - return 0;
> -}
> -
> -static void dlm_midcomms_receive_buffer_3_1(const union dlm_packet *p, int nodeid)
> -{
> - uint16_t msglen = le16_to_cpu(p->header.h_length);
> - struct midcomms_node *node;
> - int idx;
> -
> - idx = srcu_read_lock(&nodes_srcu);
> - node = dlm_midcomms_recv_node_lookup(nodeid, p, msglen,
> - dlm_midcomms_version_check_3_1);
> - if (!node) {
> srcu_read_unlock(&nodes_srcu, idx);
> return;
> }
> @@ -1005,8 +916,8 @@ void dlm_midcomms_unack_msg_resend(int nodeid)
> int idx, ret;
>
> idx = srcu_read_lock(&nodes_srcu);
> - node = nodeid2node(nodeid, 0);
> - if (!node) {
> + node = nodeid2node(nodeid);
> + if (WARN_ON_ONCE(!node)) {
> srcu_read_unlock(&nodes_srcu, idx);
> return;
> }
> @@ -1092,11 +1003,9 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
> int idx;
>
> idx = srcu_read_lock(&nodes_srcu);
> - node = nodeid2node(nodeid, 0);
> - if (!node) {
> - WARN_ON_ONCE(1);
> + node = nodeid2node(nodeid);
> + if (WARN_ON_ONCE(!node))
> goto err;
> - }
>
> /* this is a bug, however we going on and hope it will be resolved */
> WARN_ON_ONCE(test_bit(DLM_NODE_FLAG_STOP_TX, &node->flags));
> @@ -1237,8 +1146,34 @@ void dlm_midcomms_init(void)
> dlm_lowcomms_init();
> }
>
> +static void midcomms_node_release(struct rcu_head *rcu)
> +{
> + struct midcomms_node *node = container_of(rcu, struct midcomms_node, rcu);
> +
> + WARN_ON_ONCE(atomic_read(&node->send_queue_cnt));
> + dlm_send_queue_flush(node);
> + kfree(node);
> +}
> +
> void dlm_midcomms_exit(void)
> {
> + struct midcomms_node *node;
> + int i, idx;
> +
> + idx = srcu_read_lock(&nodes_srcu);
> + for (i = 0; i < CONN_HASH_SIZE; i++) {
> + hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
> + dlm_delete_debug_comms_file(node->debugfs);
> +
> + spin_lock(&nodes_lock);
> + hlist_del_rcu(&node->hlist);
> + spin_unlock(&nodes_lock);
> +
> + call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release);
> + }
> + }
> + srcu_read_unlock(&nodes_srcu, idx);
> +
> dlm_lowcomms_exit();
> }
>
> @@ -1279,8 +1214,8 @@ void dlm_midcomms_add_member(int nodeid)
> int idx;
>
> idx = srcu_read_lock(&nodes_srcu);
> - node = nodeid2node(nodeid, GFP_NOFS);
> - if (!node) {
> + node = nodeid2node(nodeid);
> + if (WARN_ON_ONCE(!node)) {
> srcu_read_unlock(&nodes_srcu, idx);
> return;
> }
> @@ -1324,8 +1259,8 @@ void dlm_midcomms_remove_member(int nodeid)
> int idx;
>
> idx = srcu_read_lock(&nodes_srcu);
> - node = nodeid2node(nodeid, 0);
> - if (!node) {
> + node = nodeid2node(nodeid);
> + if (WARN_ON_ONCE(!node)) {
> srcu_read_unlock(&nodes_srcu, idx);
> return;
> }
> @@ -1369,15 +1304,6 @@ void dlm_midcomms_remove_member(int nodeid)
> srcu_read_unlock(&nodes_srcu, idx);
> }
>
> -static void midcomms_node_release(struct rcu_head *rcu)
> -{
> - struct midcomms_node *node = container_of(rcu, struct midcomms_node, rcu);
> -
> - WARN_ON_ONCE(atomic_read(&node->send_queue_cnt));
> - dlm_send_queue_flush(node);
> - kfree(node);
> -}
> -
> void dlm_midcomms_version_wait(void)
> {
> struct midcomms_node *node;
> @@ -1440,7 +1366,7 @@ static void midcomms_shutdown(struct midcomms_node *node)
> node->state == DLM_CLOSED ||
> test_bit(DLM_NODE_FLAG_CLOSE, &node->flags),
> DLM_SHUTDOWN_TIMEOUT);
> - if (!ret || test_bit(DLM_NODE_FLAG_CLOSE, &node->flags))
> + if (!ret)
> pr_debug("active shutdown timed out for node %d with state %s\n",
> node->nodeid, dlm_state_str(node->state));
> else
> @@ -1458,14 +1384,6 @@ void dlm_midcomms_shutdown(void)
> for (i = 0; i < CONN_HASH_SIZE; i++) {
> hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
> midcomms_shutdown(node);
> -
> - dlm_delete_debug_comms_file(node->debugfs);
> -
> - spin_lock(&nodes_lock);
> - hlist_del_rcu(&node->hlist);
> - spin_unlock(&nodes_lock);
> -
> - call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release);
> }
> }
> srcu_read_unlock(&nodes_srcu, idx);
> @@ -1481,7 +1399,7 @@ int dlm_midcomms_close(int nodeid)
>
> idx = srcu_read_lock(&nodes_srcu);
> /* Abort pending close/remove operation */
> - node = nodeid2node(nodeid, 0);
> + node = nodeid2node(nodeid);
> if (node) {
> /* let shutdown waiters leave */
> set_bit(DLM_NODE_FLAG_CLOSE, &node->flags);
> @@ -1493,7 +1411,7 @@ int dlm_midcomms_close(int nodeid)
>
> mutex_lock(&close_lock);
> idx = srcu_read_lock(&nodes_srcu);
> - node = nodeid2node(nodeid, 0);
> + node = nodeid2node(nodeid);
> if (!node) {
> srcu_read_unlock(&nodes_srcu, idx);
> mutex_unlock(&close_lock);
> @@ -1501,9 +1419,21 @@ int dlm_midcomms_close(int nodeid)
> }
>
> ret = dlm_lowcomms_close(nodeid);
> - spin_lock(&node->state_lock);
> - midcomms_node_reset(node);
> - spin_unlock(&node->state_lock);
> + dlm_delete_debug_comms_file(node->debugfs);
> +
> + spin_lock(&nodes_lock);
> + hlist_del_rcu(&node->hlist);
> + spin_unlock(&nodes_lock);
> +
> + /* wait that all readers left until flush send queue */
> + synchronize_srcu(&nodes_srcu);
> +
This will obviously deadlock when holding the nodes_srcu read lock. I
have a small fix for it by moving srcu_read_unlock(&nodes_srcu, idx);
right after we removed the node from the hash.
- Alex
next prev parent reply other threads:[~2023-07-27 15:24 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-27 13:22 [Cluster-devel] [PATCH dlm-next 01/13] fs: dlm: add missing spin_unlock Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 02/13] fs: dlm: remove unused processed_nodes Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 03/13] fs: dlm: debugfs for queued callbacks Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 04/13] fs: dlm: check on plock ops when exit dlm Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 05/13] fs: dlm: add plock dev tracepoints Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 06/13] fs: dlm: remove clear_members_cb Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 07/13] fs: dlm: cleanup lock order Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 08/13] fs: dlm: get recovery sequence number as parameter Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 09/13] fs: dlm: drop rxbuf manipulation in dlm_copy_master_names Alexander Aring
2023-07-27 13:23 ` [Cluster-devel] [PATCH dlm-next 10/13] fs: dlm: drop rxbuf manipulation in dlm_recover_master_copy Alexander Aring
2023-07-27 13:23 ` [Cluster-devel] [PATCH dlm-next 11/13] fs: dlm: constify receive buffer Alexander Aring
2023-07-27 13:23 ` [Cluster-devel] [PATCH dlm-next 12/13] fs: dlm: create midcomms nodes when configure Alexander Aring
2023-07-27 15:24 ` Alexander Aring [this message]
2023-07-27 13:23 ` [Cluster-devel] [PATCH dlm-next 13/13] fs: dlm: don't use RCOM_NAMES for version detection Alexander Aring
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAK-6q+iFEp-uieJ5XoN3nzwJvBbcE3MeNyGRJx6U9D5PP7OULg@mail.gmail.com \
--to=aahringo@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).