Netdev Archive mirror
 help / color / mirror / Atom feed
From: "Håkon Bugge" <haakon.bugge@oracle.com>
To: linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, rds-devel@oss.oracle.com
Cc: "Jason Gunthorpe" <jgg@ziepe.ca>,
	"Leon Romanovsky" <leon@kernel.org>,
	"Saeed Mahameed" <saeedm@nvidia.com>,
	"Tariq Toukan" <tariqt@nvidia.com>,
	"David S . Miller" <davem@davemloft.net>,
	"Eric Dumazet" <edumazet@google.com>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Paolo Abeni" <pabeni@redhat.com>, "Tejun Heo" <tj@kernel.org>,
	"Lai Jiangshan" <jiangshanlai@gmail.com>,
	"Allison Henderson" <allison.henderson@oracle.com>,
	"Manjunath Patil" <manjunath.b.patil@oracle.com>,
	"Mark Zhang" <markzhang@nvidia.com>,
	"Håkon Bugge" <haakon.bugge@oracle.com>,
	"Chuck Lever" <chuck.lever@oracle.com>,
	"Shiraz Saleem" <shiraz.saleem@intel.com>,
	"Yang Li" <yang.lee@linux.alibaba.com>
Subject: [PATCH v2 2/6] rds: Brute force GFP_NOIO
Date: Wed, 15 May 2024 14:53:38 +0200	[thread overview]
Message-ID: <20240515125342.1069999-3-haakon.bugge@oracle.com> (raw)
In-Reply-To: <20240515125342.1069999-1-haakon.bugge@oracle.com>

For most entry points to RDS, we call memalloc_noio_{save,restore} in
a parenthetic fashion when enabled by the module parameter force_noio.

We skip the calls to memalloc_noio_{save,restore} in rds_ioctl(), as
no memory allocations are executed in this function or its callees.

The reason we execute memalloc_noio_{save,restore} in rds_poll(), is
due to the following call chain:

rds_poll()
        poll_wait()
                __pollwait()
                        poll_get_entry()
                                __get_free_page(GFP_KERNEL)

The function rds_setsockopt() allocates memory in its callee's
rds_get_mr() and rds_get_mr_for_dest(). Hence, we need
memalloc_noio_{save,restore} in rds_setsockopt().

In rds_getsockopt(), we have rds_info_getsockopt() that allocates
memory. Hence, we need memalloc_noio_{save,restore} in
rds_getsockopt().

All the above, in order to conditionally enable RDS to become a block I/O
device.

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>

---

v1 -> v2:
   * s/EXPORT_SYMBOL/static/ for the rds_force_noio variable as
     pin-pointed by Simon
   * Straightened the reverse xmas tree two places
   * Fixed C/P error in rds_cancel_sent_to() where I had two _save()s
     and no _restore() as reported by Simon
---
 net/rds/af_rds.c | 59 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 8435a20968ef5..846ad20b3783a 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -37,10 +37,15 @@
 #include <linux/in.h>
 #include <linux/ipv6.h>
 #include <linux/poll.h>
+#include <linux/sched/mm.h>
 #include <net/sock.h>
 
 #include "rds.h"
 
+static bool rds_force_noio;
+module_param_named(force_noio, rds_force_noio, bool, 0444);
+MODULE_PARM_DESC(force_noio, "Force the use of GFP_NOIO (Y/N)");
+
 /* this is just used for stats gathering :/ */
 static DEFINE_SPINLOCK(rds_sock_lock);
 static unsigned long rds_sock_count;
@@ -59,8 +64,12 @@ DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq);
 static int rds_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
+	unsigned int noio_flags;
 	struct rds_sock *rs;
 
+	if (rds_force_noio)
+		noio_flags = memalloc_noio_save();
+
 	if (!sk)
 		goto out;
 
@@ -90,6 +99,8 @@ static int rds_release(struct socket *sock)
 	sock->sk = NULL;
 	sock_put(sk);
 out:
+	if (rds_force_noio)
+		memalloc_noio_restore(noio_flags);
 	return 0;
 }
 
@@ -214,9 +225,13 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
 {
 	struct sock *sk = sock->sk;
 	struct rds_sock *rs = rds_sk_to_rs(sk);
+	unsigned int noio_flags;
 	__poll_t mask = 0;
 	unsigned long flags;
 
+	if (rds_force_noio)
+		noio_flags = memalloc_noio_save();
+
 	poll_wait(file, sk_sleep(sk), wait);
 
 	if (rs->rs_seen_congestion)
@@ -249,6 +264,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
 	if (mask)
 		rs->rs_seen_congestion = 0;
 
+	if (rds_force_noio)
+		memalloc_noio_restore(noio_flags);
 	return mask;
 }
 
@@ -293,9 +310,13 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 static int rds_cancel_sent_to(struct rds_sock *rs, sockptr_t optval, int len)
 {
 	struct sockaddr_in6 sin6;
+	unsigned int noio_flags;
 	struct sockaddr_in sin;
 	int ret = 0;
 
+	if (rds_force_noio)
+		noio_flags = memalloc_noio_save();
+
 	/* racing with another thread binding seems ok here */
 	if (ipv6_addr_any(&rs->rs_bound_addr)) {
 		ret = -ENOTCONN; /* XXX not a great errno */
@@ -324,6 +345,8 @@ static int rds_cancel_sent_to(struct rds_sock *rs, sockptr_t optval, int len)
 
 	rds_send_drop_to(rs, &sin6);
 out:
+	if (rds_force_noio)
+		memalloc_noio_restore(noio_flags);
 	return ret;
 }
 
@@ -485,8 +508,12 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
 {
 	struct rds_sock *rs = rds_sk_to_rs(sock->sk);
 	int ret = -ENOPROTOOPT, len;
+	unsigned int noio_flags;
 	int trans;
 
+	if (rds_force_noio)
+		noio_flags = memalloc_noio_save();
+
 	if (level != SOL_RDS)
 		goto out;
 
@@ -529,6 +556,8 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
 	}
 
 out:
+	if (rds_force_noio)
+		memalloc_noio_restore(noio_flags);
 	return ret;
 
 }
@@ -538,12 +567,16 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
 {
 	struct sock *sk = sock->sk;
 	struct sockaddr_in *sin;
+	unsigned int noio_flags;
 	struct rds_sock *rs = rds_sk_to_rs(sk);
 	int ret = 0;
 
 	if (addr_len < offsetofend(struct sockaddr, sa_family))
 		return -EINVAL;
 
+	if (rds_force_noio)
+		noio_flags = memalloc_noio_save();
+
 	lock_sock(sk);
 
 	switch (uaddr->sa_family) {
@@ -626,6 +659,8 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
 	}
 
 	release_sock(sk);
+	if (rds_force_noio)
+		memalloc_noio_restore(noio_flags);
 	return ret;
 }
 
@@ -697,16 +732,28 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 static int rds_create(struct net *net, struct socket *sock, int protocol,
 		      int kern)
 {
+	unsigned int noio_flags;
 	struct sock *sk;
+	int ret;
 
 	if (sock->type != SOCK_SEQPACKET || protocol)
 		return -ESOCKTNOSUPPORT;
 
+	if (rds_force_noio)
+		noio_flags = memalloc_noio_save();
+
 	sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern);
-	if (!sk)
-		return -ENOMEM;
+	if (!sk) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
-	return __rds_create(sock, sk, protocol);
+	ret = __rds_create(sock, sk, protocol);
+out:
+	if (rds_force_noio)
+		memalloc_noio_restore(noio_flags);
+
+	return ret;
 }
 
 void rds_sock_addref(struct rds_sock *rs)
@@ -895,8 +942,12 @@ u32 rds_gen_num;
 
 static int __init rds_init(void)
 {
+	unsigned int noio_flags;
 	int ret;
 
+	if (rds_force_noio)
+		noio_flags = memalloc_noio_save();
+
 	net_get_random_once(&rds_gen_num, sizeof(rds_gen_num));
 
 	ret = rds_bind_lock_init();
@@ -947,6 +998,8 @@ static int __init rds_init(void)
 out_bind:
 	rds_bind_lock_destroy();
 out:
+	if (rds_force_noio)
+		memalloc_noio_restore(noio_flags);
 	return ret;
 }
 module_init(rds_init);
-- 
2.45.0


  parent reply	other threads:[~2024-05-15 12:54 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-15 12:53 [PATCH v2 0/6] rds: rdma: Add ability to force GFP_NOIO Håkon Bugge
2024-05-15 12:53 ` [PATCH v2 1/6] workqueue: Inherit NOIO and NOFS alloc flags Håkon Bugge
2024-05-15 16:54   ` Tejun Heo
2024-05-16 15:27     ` Haakon Bugge
2024-05-16 16:29       ` Tejun Heo
2024-05-21 14:02         ` Haakon Bugge
2024-05-15 12:53 ` Håkon Bugge [this message]
2024-05-15 12:53 ` [PATCH v2 3/6] RDMA/cma: Brute force GFP_NOIO Håkon Bugge
2024-05-16  7:37   ` Zhu Yanjun
2024-05-16 15:49     ` Haakon Bugge
2024-05-16 19:07       ` Greg Sword
2024-05-17  9:28         ` Haakon Bugge
2024-05-26  9:27         ` Leon Romanovsky
2024-05-15 12:53 ` [PATCH v2 4/6] RDMA/cm: " Håkon Bugge
2024-05-15 12:53 ` [PATCH v2 5/6] RDMA/mlx5: " Håkon Bugge
2024-05-15 12:53 ` [PATCH v2 6/6] net/mlx5: " Håkon Bugge
2024-05-21 14:24 ` [PATCH v2 0/6] rds: rdma: Add ability to " Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240515125342.1069999-3-haakon.bugge@oracle.com \
    --to=haakon.bugge@oracle.com \
    --cc=allison.henderson@oracle.com \
    --cc=chuck.lever@oracle.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=jgg@ziepe.ca \
    --cc=jiangshanlai@gmail.com \
    --cc=kuba@kernel.org \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=manjunath.b.patil@oracle.com \
    --cc=markzhang@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=rds-devel@oss.oracle.com \
    --cc=saeedm@nvidia.com \
    --cc=shiraz.saleem@intel.com \
    --cc=tariqt@nvidia.com \
    --cc=tj@kernel.org \
    --cc=yang.lee@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).