cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: Alexander Aring <aahringo@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH dlm-next 12/13] fs: dlm: create midcomms nodes when configure
Date: Thu, 27 Jul 2023 11:24:27 -0400	[thread overview]
Message-ID: <CAK-6q+iFEp-uieJ5XoN3nzwJvBbcE3MeNyGRJx6U9D5PP7OULg@mail.gmail.com> (raw)
In-Reply-To: <20230727132303.3352514-12-aahringo@redhat.com>

Hi,

On Thu, Jul 27, 2023 at 9:23?AM Alexander Aring <aahringo@redhat.com> wrote:
>
> This patch puts the life of a midcomms node the same as a lowcomms
> connection. The lowcomms connection lifetime was changed by commit
> 6f0b0b5d7ae7 ("fs: dlm: remove dlm_node_addrs lookup list"). In the
> future the midcomms node instances can be merged with lowcomms
> connection structure as the lifetime is the same and states can be
> controlled over values or flags.
>
> Before midcomms nodes were generated during version detection. This is
> not necessary anymore when the nodes are created when the cluster
> manager configures DLM via configfs. When a midcomms node is created over
> configfs it well set DLM_VERSION_NOT_SET as version. This indicates that
> the version of the midcomms node is still unknown and need to be probed
> via certain rcom messages.
>
> Signed-off-by: Alexander Aring <aahringo@redhat.com>
> ---
>  fs/dlm/config.c   |   2 +-
>  fs/dlm/midcomms.c | 286 +++++++++++++++++-----------------------------
>  fs/dlm/midcomms.h |   1 +
>  3 files changed, 110 insertions(+), 179 deletions(-)
>
> diff --git a/fs/dlm/config.c b/fs/dlm/config.c
> index 2beceff024e3..e55e0a2cd2e8 100644
> --- a/fs/dlm/config.c
> +++ b/fs/dlm/config.c
> @@ -664,7 +664,7 @@ static ssize_t comm_addr_store(struct config_item *item, const char *buf,
>
>         memcpy(addr, buf, len);
>
> -       rv = dlm_lowcomms_addr(cm->nodeid, addr, len);
> +       rv = dlm_midcomms_addr(cm->nodeid, addr, len);
>         if (rv) {
>                 kfree(addr);
>                 return rv;
> diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
> index c125496e03bf..24952ecf8e1a 100644
> --- a/fs/dlm/midcomms.c
> +++ b/fs/dlm/midcomms.c
> @@ -330,18 +330,23 @@ static void midcomms_node_reset(struct midcomms_node *node)
>         wake_up(&node->shutdown_wait);
>  }
>
> -static struct midcomms_node *nodeid2node(int nodeid, gfp_t alloc)
> +static struct midcomms_node *nodeid2node(int nodeid)
>  {
> -       struct midcomms_node *node, *tmp;
> -       int r = nodeid_hash(nodeid);
> +       return __find_node(nodeid, nodeid_hash(nodeid));
> +}
> +
> +int dlm_midcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
> +{
> +       int ret, r = nodeid_hash(nodeid);
> +       struct midcomms_node *node;
>
> -       node = __find_node(nodeid, r);
> -       if (node || !alloc)
> -               return node;
> +       ret = dlm_lowcomms_addr(nodeid, addr, len);
> +       if (ret)
> +               return ret;
>
> -       node = kmalloc(sizeof(*node), alloc);
> +       node = kmalloc(sizeof(*node), GFP_NOFS);
>         if (!node)
> -               return NULL;
> +               return -ENOMEM;
>
>         node->nodeid = nodeid;
>         spin_lock_init(&node->state_lock);
> @@ -353,21 +358,11 @@ static struct midcomms_node *nodeid2node(int nodeid, gfp_t alloc)
>         midcomms_node_reset(node);
>
>         spin_lock(&nodes_lock);
> -       /* check again if there was somebody else
> -        * earlier here to add the node
> -        */
> -       tmp = __find_node(nodeid, r);
> -       if (tmp) {
> -               spin_unlock(&nodes_lock);
> -               kfree(node);
> -               return tmp;
> -       }
> -
>         hlist_add_head_rcu(&node->hlist, &node_hash[r]);
>         spin_unlock(&nodes_lock);
>
>         node->debugfs = dlm_create_debug_comms_file(nodeid, node);
> -       return node;
> +       return 0;
>  }
>
>  static int dlm_send_ack(int nodeid, uint32_t seq)
> @@ -603,112 +598,6 @@ static void dlm_midcomms_receive_buffer(const union dlm_packet *p,
>         }
>  }
>
> -static struct midcomms_node *
> -dlm_midcomms_recv_node_lookup(int nodeid, const union dlm_packet *p,
> -                             uint16_t msglen, int (*cb)(struct midcomms_node *node))
> -{
> -       struct midcomms_node *node = NULL;
> -       gfp_t allocation = 0;
> -       int ret;
> -
> -       switch (p->header.h_cmd) {
> -       case DLM_RCOM:
> -               if (msglen < sizeof(struct dlm_rcom)) {
> -                       log_print("rcom msg too small: %u, will skip this message from node %d",
> -                                 msglen, nodeid);
> -                       return NULL;
> -               }
> -
> -               switch (p->rcom.rc_type) {
> -               case cpu_to_le32(DLM_RCOM_NAMES):
> -                       fallthrough;
> -               case cpu_to_le32(DLM_RCOM_NAMES_REPLY):
> -                       fallthrough;
> -               case cpu_to_le32(DLM_RCOM_STATUS):
> -                       fallthrough;
> -               case cpu_to_le32(DLM_RCOM_STATUS_REPLY):
> -                       node = nodeid2node(nodeid, 0);
> -                       if (node) {
> -                               spin_lock(&node->state_lock);
> -                               if (node->state != DLM_ESTABLISHED)
> -                                       pr_debug("receive begin RCOM msg from node %d with state %s\n",
> -                                                node->nodeid, dlm_state_str(node->state));
> -
> -                               switch (node->state) {
> -                               case DLM_CLOSED:
> -                                       node->state = DLM_ESTABLISHED;
> -                                       pr_debug("switch node %d to state %s\n",
> -                                                node->nodeid, dlm_state_str(node->state));
> -                                       break;
> -                               case DLM_ESTABLISHED:
> -                                       break;
> -                               default:
> -                                       spin_unlock(&node->state_lock);
> -                                       return NULL;
> -                               }
> -                               spin_unlock(&node->state_lock);
> -                       }
> -
> -                       allocation = GFP_NOFS;
> -                       break;
> -               default:
> -                       break;
> -               }
> -
> -               break;
> -       default:
> -               break;
> -       }
> -
> -       node = nodeid2node(nodeid, allocation);
> -       if (!node) {
> -               switch (p->header.h_cmd) {
> -               case DLM_OPTS:
> -                       if (msglen < sizeof(struct dlm_opts)) {
> -                               log_print("opts msg too small: %u, will skip this message from node %d",
> -                                         msglen, nodeid);
> -                               return NULL;
> -                       }
> -
> -                       log_print_ratelimited("received dlm opts message nextcmd %d from node %d in an invalid sequence",
> -                                             p->opts.o_nextcmd, nodeid);
> -                       break;
> -               default:
> -                       log_print_ratelimited("received dlm message cmd %d from node %d in an invalid sequence",
> -                                             p->header.h_cmd, nodeid);
> -                       break;
> -               }
> -
> -               return NULL;
> -       }
> -
> -       ret = cb(node);
> -       if (ret < 0)
> -               return NULL;
> -
> -       return node;
> -}
> -
> -static int dlm_midcomms_version_check_3_2(struct midcomms_node *node)
> -{
> -       switch (node->version) {
> -       case DLM_VERSION_NOT_SET:
> -               node->version = DLM_VERSION_3_2;
> -               wake_up(&node->shutdown_wait);
> -               log_print("version 0x%08x for node %d detected", DLM_VERSION_3_2,
> -                         node->nodeid);
> -               break;
> -       case DLM_VERSION_3_2:
> -               break;
> -       default:
> -               log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
> -                                     DLM_VERSION_3_2, node->nodeid, node->version);
> -               return -1;
> -       }
> -
> -       return 0;
> -}
> -
>  static int dlm_opts_check_msglen(const union dlm_packet *p, uint16_t msglen,
>                                  int nodeid)
>  {
> @@ -767,10 +656,37 @@ static void dlm_midcomms_receive_buffer_3_2(const union dlm_packet *p, int nodei
>         int ret, idx;
>
>         idx = srcu_read_lock(&nodes_srcu);
> -       node = dlm_midcomms_recv_node_lookup(nodeid, p, msglen,
> -                                            dlm_midcomms_version_check_3_2);
> -       if (!node)
> +       node = nodeid2node(nodeid);
> +       if (WARN_ON_ONCE(!node))
> +               goto out;
> +
> +       switch (node->version) {
> +       case DLM_VERSION_NOT_SET:
> +               node->version = DLM_VERSION_3_2;
> +               wake_up(&node->shutdown_wait);
> +               log_print("version 0x%08x for node %d detected", DLM_VERSION_3_2,
> +                         node->nodeid);
> +
> +               spin_lock(&node->state_lock);
> +               switch (node->state) {
> +               case DLM_CLOSED:
> +                       node->state = DLM_ESTABLISHED;
> +                       pr_debug("switch node %d to state %s\n",
> +                                node->nodeid, dlm_state_str(node->state));
> +                       break;
> +               default:
> +                       break;
> +               }
> +               spin_unlock(&node->state_lock);
> +
> +               break;
> +       case DLM_VERSION_3_2:
> +               break;
> +       default:
> +               log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
> +                                     DLM_VERSION_3_2, node->nodeid, node->version);
>                 goto out;
> +       }
>
>         switch (p->header.h_cmd) {
>         case DLM_RCOM:
> @@ -860,8 +776,19 @@ static void dlm_midcomms_receive_buffer_3_2(const union dlm_packet *p, int nodei
>         srcu_read_unlock(&nodes_srcu, idx);
>  }
>
> -static int dlm_midcomms_version_check_3_1(struct midcomms_node *node)
> +static void dlm_midcomms_receive_buffer_3_1(const union dlm_packet *p, int nodeid)
>  {
> +       uint16_t msglen = le16_to_cpu(p->header.h_length);
> +       struct midcomms_node *node;
> +       int idx;
> +
> +       idx = srcu_read_lock(&nodes_srcu);
> +       node = nodeid2node(nodeid);
> +       if (WARN_ON_ONCE(!node)) {
> +               srcu_read_unlock(&nodes_srcu, idx);
> +               return;
> +       }
> +
>         switch (node->version) {
>         case DLM_VERSION_NOT_SET:
>                 node->version = DLM_VERSION_3_1;
> @@ -874,22 +801,6 @@ static int dlm_midcomms_version_check_3_1(struct midcomms_node *node)
>         default:
>                 log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
>                                       DLM_VERSION_3_1, node->nodeid, node->version);
> -               return -1;
> -       }
> -
> -       return 0;
> -}
> -
> -static void dlm_midcomms_receive_buffer_3_1(const union dlm_packet *p, int nodeid)
> -{
> -       uint16_t msglen = le16_to_cpu(p->header.h_length);
> -       struct midcomms_node *node;
> -       int idx;
> -
> -       idx = srcu_read_lock(&nodes_srcu);
> -       node = dlm_midcomms_recv_node_lookup(nodeid, p, msglen,
> -                                            dlm_midcomms_version_check_3_1);
> -       if (!node) {
>                 srcu_read_unlock(&nodes_srcu, idx);
>                 return;
>         }
> @@ -1005,8 +916,8 @@ void dlm_midcomms_unack_msg_resend(int nodeid)
>         int idx, ret;
>
>         idx = srcu_read_lock(&nodes_srcu);
> -       node = nodeid2node(nodeid, 0);
> -       if (!node) {
> +       node = nodeid2node(nodeid);
> +       if (WARN_ON_ONCE(!node)) {
>                 srcu_read_unlock(&nodes_srcu, idx);
>                 return;
>         }
> @@ -1092,11 +1003,9 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
>         int idx;
>
>         idx = srcu_read_lock(&nodes_srcu);
> -       node = nodeid2node(nodeid, 0);
> -       if (!node) {
> -               WARN_ON_ONCE(1);
> +       node = nodeid2node(nodeid);
> +       if (WARN_ON_ONCE(!node))
>                 goto err;
> -       }
>
>         /* this is a bug, however we going on and hope it will be resolved */
>         WARN_ON_ONCE(test_bit(DLM_NODE_FLAG_STOP_TX, &node->flags));
> @@ -1237,8 +1146,34 @@ void dlm_midcomms_init(void)
>         dlm_lowcomms_init();
>  }
>
> +static void midcomms_node_release(struct rcu_head *rcu)
> +{
> +       struct midcomms_node *node = container_of(rcu, struct midcomms_node, rcu);
> +
> +       WARN_ON_ONCE(atomic_read(&node->send_queue_cnt));
> +       dlm_send_queue_flush(node);
> +       kfree(node);
> +}
> +
>  void dlm_midcomms_exit(void)
>  {
> +       struct midcomms_node *node;
> +       int i, idx;
> +
> +       idx = srcu_read_lock(&nodes_srcu);
> +       for (i = 0; i < CONN_HASH_SIZE; i++) {
> +               hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
> +                       dlm_delete_debug_comms_file(node->debugfs);
> +
> +                       spin_lock(&nodes_lock);
> +                       hlist_del_rcu(&node->hlist);
> +                       spin_unlock(&nodes_lock);
> +
> +                       call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release);
> +               }
> +       }
> +       srcu_read_unlock(&nodes_srcu, idx);
> +
>         dlm_lowcomms_exit();
>  }
>
> @@ -1279,8 +1214,8 @@ void dlm_midcomms_add_member(int nodeid)
>         int idx;
>
>         idx = srcu_read_lock(&nodes_srcu);
> -       node = nodeid2node(nodeid, GFP_NOFS);
> -       if (!node) {
> +       node = nodeid2node(nodeid);
> +       if (WARN_ON_ONCE(!node)) {
>                 srcu_read_unlock(&nodes_srcu, idx);
>                 return;
>         }
> @@ -1324,8 +1259,8 @@ void dlm_midcomms_remove_member(int nodeid)
>         int idx;
>
>         idx = srcu_read_lock(&nodes_srcu);
> -       node = nodeid2node(nodeid, 0);
> -       if (!node) {
> +       node = nodeid2node(nodeid);
> +       if (WARN_ON_ONCE(!node)) {
>                 srcu_read_unlock(&nodes_srcu, idx);
>                 return;
>         }
> @@ -1369,15 +1304,6 @@ void dlm_midcomms_remove_member(int nodeid)
>         srcu_read_unlock(&nodes_srcu, idx);
>  }
>
> -static void midcomms_node_release(struct rcu_head *rcu)
> -{
> -       struct midcomms_node *node = container_of(rcu, struct midcomms_node, rcu);
> -
> -       WARN_ON_ONCE(atomic_read(&node->send_queue_cnt));
> -       dlm_send_queue_flush(node);
> -       kfree(node);
> -}
> -
>  void dlm_midcomms_version_wait(void)
>  {
>         struct midcomms_node *node;
> @@ -1440,7 +1366,7 @@ static void midcomms_shutdown(struct midcomms_node *node)
>                                  node->state == DLM_CLOSED ||
>                                  test_bit(DLM_NODE_FLAG_CLOSE, &node->flags),
>                                  DLM_SHUTDOWN_TIMEOUT);
> -       if (!ret || test_bit(DLM_NODE_FLAG_CLOSE, &node->flags))
> +       if (!ret)
>                 pr_debug("active shutdown timed out for node %d with state %s\n",
>                          node->nodeid, dlm_state_str(node->state));
>         else
> @@ -1458,14 +1384,6 @@ void dlm_midcomms_shutdown(void)
>         for (i = 0; i < CONN_HASH_SIZE; i++) {
>                 hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
>                         midcomms_shutdown(node);
> -
> -                       dlm_delete_debug_comms_file(node->debugfs);
> -
> -                       spin_lock(&nodes_lock);
> -                       hlist_del_rcu(&node->hlist);
> -                       spin_unlock(&nodes_lock);
> -
> -                       call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release);
>                 }
>         }
>         srcu_read_unlock(&nodes_srcu, idx);
> @@ -1481,7 +1399,7 @@ int dlm_midcomms_close(int nodeid)
>
>         idx = srcu_read_lock(&nodes_srcu);
>         /* Abort pending close/remove operation */
> -       node = nodeid2node(nodeid, 0);
> +       node = nodeid2node(nodeid);
>         if (node) {
>                 /* let shutdown waiters leave */
>                 set_bit(DLM_NODE_FLAG_CLOSE, &node->flags);
> @@ -1493,7 +1411,7 @@ int dlm_midcomms_close(int nodeid)
>
>         mutex_lock(&close_lock);
>         idx = srcu_read_lock(&nodes_srcu);
> -       node = nodeid2node(nodeid, 0);
> +       node = nodeid2node(nodeid);
>         if (!node) {
>                 srcu_read_unlock(&nodes_srcu, idx);
>                 mutex_unlock(&close_lock);
> @@ -1501,9 +1419,21 @@ int dlm_midcomms_close(int nodeid)
>         }
>
>         ret = dlm_lowcomms_close(nodeid);
> -       spin_lock(&node->state_lock);
> -       midcomms_node_reset(node);
> -       spin_unlock(&node->state_lock);
> +       dlm_delete_debug_comms_file(node->debugfs);
> +
> +       spin_lock(&nodes_lock);
> +       hlist_del_rcu(&node->hlist);
> +       spin_unlock(&nodes_lock);
> +
> +       /* wait that all readers left until flush send queue */
> +       synchronize_srcu(&nodes_srcu);
> +

This will obviously deadlock when holding the nodes_srcu read lock. I
have a small fix for it by moving srcu_read_unlock(&nodes_srcu, idx);
right after we removed the node from the hash.

- Alex


  reply	other threads:[~2023-07-27 15:24 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-27 13:22 [Cluster-devel] [PATCH dlm-next 01/13] fs: dlm: add missing spin_unlock Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 02/13] fs: dlm: remove unused processed_nodes Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 03/13] fs: dlm: debugfs for queued callbacks Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 04/13] fs: dlm: check on plock ops when exit dlm Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 05/13] fs: dlm: add plock dev tracepoints Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 06/13] fs: dlm: remove clear_members_cb Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 07/13] fs: dlm: cleanup lock order Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 08/13] fs: dlm: get recovery sequence number as parameter Alexander Aring
2023-07-27 13:22 ` [Cluster-devel] [PATCH dlm-next 09/13] fs: dlm: drop rxbuf manipulation in dlm_copy_master_names Alexander Aring
2023-07-27 13:23 ` [Cluster-devel] [PATCH dlm-next 10/13] fs: dlm: drop rxbuf manipulation in dlm_recover_master_copy Alexander Aring
2023-07-27 13:23 ` [Cluster-devel] [PATCH dlm-next 11/13] fs: dlm: constify receive buffer Alexander Aring
2023-07-27 13:23 ` [Cluster-devel] [PATCH dlm-next 12/13] fs: dlm: create midcomms nodes when configure Alexander Aring
2023-07-27 15:24   ` Alexander Aring [this message]
2023-07-27 13:23 ` [Cluster-devel] [PATCH dlm-next 13/13] fs: dlm: don't use RCOM_NAMES for version detection Alexander Aring

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAK-6q+iFEp-uieJ5XoN3nzwJvBbcE3MeNyGRJx6U9D5PP7OULg@mail.gmail.com \
    --to=aahringo@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).