All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/7] pnfs write functionality (wave 4)
@ 2011-02-21 17:49 Fred Isaman
  2011-02-21 17:49 ` [PATCH 1/7] NFSv4.1: rearrange nfs_write_rpcsetup Fred Isaman
                   ` (6 more replies)
  0 siblings, 7 replies; 15+ messages in thread
From: Fred Isaman @ 2011-02-21 17:49 UTC (permalink / raw
  To: linux-nfs; +Cc: Trond Myklebust

These apply on top of Andy's most recent wave3 patches.

They can also be found at the branch wave4-submit-1 at
git://linux-nfs.org/~iisaman/linux-pnfs.git

These patches give write functionality to the pnfs file layout driver.
The primary limitation is that we have not yet modified the commit
code, so we can not yet handle COMMITs to the data server.  We get
around this for the moment by sending any WRITE to a data server which
does not have commit-to-mds set as a SYNCH write.  The next patchset will
address this limitation.

Fred


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/7] NFSv4.1: rearrange nfs_write_rpcsetup
  2011-02-21 17:49 [PATCH 0/7] pnfs write functionality (wave 4) Fred Isaman
@ 2011-02-21 17:49 ` Fred Isaman
  2011-02-21 17:49 ` [PATCH 2/7] NFSv4.1: add callback to nfs4_write_done Fred Isaman
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Fred Isaman @ 2011-02-21 17:49 UTC (permalink / raw
  To: linux-nfs; +Cc: Trond Myklebust

From: Andy Adamson <andros@netapp.com>

Reorder nfs_write_rpcsetup, preparing for a pnfs entry point.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/write.c |   82 +++++++++++++++++++++++++++++++------------------------
 1 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index aca0268..5604854 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -782,25 +782,21 @@ static int flush_task_priority(int how)
 	return RPC_PRIORITY_NORMAL;
 }
 
-/*
- * Set up the argument/result storage required for the RPC call.
- */
-static int nfs_write_rpcsetup(struct nfs_page *req,
-		struct nfs_write_data *data,
-		const struct rpc_call_ops *call_ops,
-		unsigned int count, unsigned int offset,
-		int how)
+static int nfs_initiate_write(struct nfs_write_data *data,
+		       struct rpc_clnt *clnt,
+		       const struct rpc_call_ops *call_ops,
+		       int how)
 {
-	struct inode *inode = req->wb_context->path.dentry->d_inode;
+	struct inode *inode = data->inode;
 	int priority = flush_task_priority(how);
 	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
-		.rpc_cred = req->wb_context->cred,
+		.rpc_cred = data->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
-		.rpc_client = NFS_CLIENT(inode),
+		.rpc_client = clnt,
 		.task = &data->task,
 		.rpc_message = &msg,
 		.callback_ops = call_ops,
@@ -811,12 +807,49 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 	};
 	int ret = 0;
 
+	/* Set up the initial task struct.  */
+	NFS_PROTO(inode)->write_setup(data, &msg);
+
+	dprintk("NFS: %5u initiated write call "
+		"(req %s/%lld, %u bytes @ offset %llu)\n",
+		data->task.tk_pid,
+		inode->i_sb->s_id,
+		(long long)NFS_FILEID(inode),
+		data->args.count,
+		(unsigned long long)data->args.offset);
+
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task)) {
+		ret = PTR_ERR(task);
+		goto out;
+	}
+	if (how & FLUSH_SYNC) {
+		ret = rpc_wait_for_completion_task(task);
+		if (ret == 0)
+			ret = task->tk_status;
+	}
+	rpc_put_task(task);
+out:
+	return ret;
+}
+
+/*
+ * Set up the argument/result storage required for the RPC call.
+ */
+static int nfs_write_rpcsetup(struct nfs_page *req,
+		struct nfs_write_data *data,
+		const struct rpc_call_ops *call_ops,
+		unsigned int count, unsigned int offset,
+		int how)
+{
+	struct inode *inode = req->wb_context->path.dentry->d_inode;
+
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 
 	data->req = req;
 	data->inode = inode = req->wb_context->path.dentry->d_inode;
-	data->cred = msg.rpc_cred;
+	data->cred = req->wb_context->cred;
 
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
@@ -837,30 +870,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
 
-	/* Set up the initial task struct.  */
-	NFS_PROTO(inode)->write_setup(data, &msg);
-
-	dprintk("NFS: %5u initiated write call "
-		"(req %s/%lld, %u bytes @ offset %llu)\n",
-		data->task.tk_pid,
-		inode->i_sb->s_id,
-		(long long)NFS_FILEID(inode),
-		count,
-		(unsigned long long)data->args.offset);
-
-	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task)) {
-		ret = PTR_ERR(task);
-		goto out;
-	}
-	if (how & FLUSH_SYNC) {
-		ret = rpc_wait_for_completion_task(task);
-		if (ret == 0)
-			ret = task->tk_status;
-	}
-	rpc_put_task(task);
-out:
-	return ret;
+	return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
 }
 
 /* If a nfs_flush_* function fails, it should remove reqs from @head and
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/7] NFSv4.1: add callback to nfs4_write_done
  2011-02-21 17:49 [PATCH 0/7] pnfs write functionality (wave 4) Fred Isaman
  2011-02-21 17:49 ` [PATCH 1/7] NFSv4.1: rearrange nfs_write_rpcsetup Fred Isaman
@ 2011-02-21 17:49 ` Fred Isaman
  2011-02-21 17:49 ` [PATCH 3/7] NFSv4.1: Send lseg down into nfs_write_rpcsetup Fred Isaman
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Fred Isaman @ 2011-02-21 17:49 UTC (permalink / raw
  To: linux-nfs; +Cc: Trond Myklebust

Add callback that pnfs layout driver can use to do its own handling
of data server WRITE response.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/nfs4proc.c       |   14 ++++++++++----
 include/linux/nfs_xdr.h |    1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f91e259..bfdd36a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3122,13 +3122,10 @@ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
 }
 EXPORT_SYMBOL_GPL(nfs4_reset_read);
 
-static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct inode *inode = data->inode;
 	
-	if (!nfs4_sequence_done(task, &data->res.seq_res))
-		return -EAGAIN;
-
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
 		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
 		return -EAGAIN;
@@ -3140,11 +3137,20 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+{
+	if (!nfs4_sequence_done(task, &data->res.seq_res))
+		return -EAGAIN;
+	return data->write_done_cb(task, data);
+}
+
 static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
 	struct nfs_server *server = NFS_SERVER(data->inode);
 
 	data->args.bitmask = server->cache_consistency_bitmask;
+	if (!data->write_done_cb)
+		data->write_done_cb = nfs4_write_done_cb;
 	data->res.server = server;
 	data->timestamp   = jiffies;
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index eb0e870..21cd41d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1038,6 +1038,7 @@ struct nfs_write_data {
 	unsigned int		npages;		/* Max length of pagevec */
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
+	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 #ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
 #endif
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/7] NFSv4.1: Send lseg down into nfs_write_rpcsetup
  2011-02-21 17:49 [PATCH 0/7] pnfs write functionality (wave 4) Fred Isaman
  2011-02-21 17:49 ` [PATCH 1/7] NFSv4.1: rearrange nfs_write_rpcsetup Fred Isaman
  2011-02-21 17:49 ` [PATCH 2/7] NFSv4.1: add callback to nfs4_write_done Fred Isaman
@ 2011-02-21 17:49 ` Fred Isaman
  2011-02-21 17:49 ` [PATCH 4/7] NFSv4.1: trigger LAYOUTGET for writes Fred Isaman
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Fred Isaman @ 2011-02-21 17:49 UTC (permalink / raw
  To: linux-nfs; +Cc: Trond Myklebust

We grab the lseg sent in from the doio function and attach it to
each struct nfs_write_data created.  This is how the lseg will be
sent to the layout driver.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/write.c          |    7 +++++--
 include/linux/nfs_xdr.h |    1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5604854..0df18ae 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -97,6 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
 
 static void nfs_writedata_release(struct nfs_write_data *wdata)
 {
+	put_lseg(wdata->lseg);
 	put_nfs_open_context(wdata->args.context);
 	nfs_writedata_free(wdata);
 }
@@ -840,6 +841,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 		struct nfs_write_data *data,
 		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset,
+		struct pnfs_layout_segment *lseg,
 		int how)
 {
 	struct inode *inode = req->wb_context->path.dentry->d_inode;
@@ -850,6 +852,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 	data->req = req;
 	data->inode = inode = req->wb_context->path.dentry->d_inode;
 	data->cred = req->wb_context->cred;
+	data->lseg = get_lseg(lseg);
 
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
@@ -930,7 +933,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
 		if (nbytes < wsize)
 			wsize = nbytes;
 		ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
-				   wsize, offset, how);
+					  wsize, offset, lseg, how);
 		if (ret == 0)
 			ret = ret2;
 		offset += wsize;
@@ -978,7 +981,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
 	req = nfs_list_entry(data->pages.next);
 
 	/* Set up the argument struct */
-	return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
+	return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
  out_bad:
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 21cd41d..09d9681 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1038,6 +1038,7 @@ struct nfs_write_data {
 	unsigned int		npages;		/* Max length of pagevec */
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
+	struct pnfs_layout_segment *lseg;
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 #ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/7] NFSv4.1: trigger LAYOUTGET for writes
  2011-02-21 17:49 [PATCH 0/7] pnfs write functionality (wave 4) Fred Isaman
                   ` (2 preceding siblings ...)
  2011-02-21 17:49 ` [PATCH 3/7] NFSv4.1: Send lseg down into nfs_write_rpcsetup Fred Isaman
@ 2011-02-21 17:49 ` Fred Isaman
  2011-02-21 18:49   ` Benny Halevy
  2011-02-21 17:49 ` [PATCH 5/7] NFSv4.1: implement generic pnfs layer write switch Fred Isaman
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 15+ messages in thread
From: Fred Isaman @ 2011-02-21 17:49 UTC (permalink / raw
  To: linux-nfs; +Cc: Trond Myklebust

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/pnfs.c  |   25 +++++++++++++++++++++++++
 fs/nfs/pnfs.h  |    7 +++++++
 fs/nfs/write.c |   32 ++++++++++++++++++++------------
 3 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a7ea646..0ed3948d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -876,6 +876,31 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
 	pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
 }
 
+static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
+			      struct nfs_page *prev,
+			      struct nfs_page *req)
+{
+	if (pgio->pg_count == prev->wb_bytes) {
+		/* This is first coelesce call for a series of nfs_pages */
+		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+						   prev->wb_context,
+						   IOMODE_RW);
+	}
+	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
+}
+
+/*
+ * rsize is already set by caller to MDS rsize.
+ */
+void
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+{
+	struct pnfs_layoutdriver_type *ld;
+
+	ld = NFS_SERVER(inode)->pnfs_curr_ld;
+	pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
+}
+
 /*
  * Call the appropriate parallel I/O subsystem read function.
  */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index acbb778..1d4e631 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -123,6 +123,7 @@ void unset_pnfs_layoutdriver(struct nfs_server *);
 enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
 					    const struct rpc_call_ops *);
 void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
+void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
 void pnfs_destroy_layout(struct nfs_inode *);
@@ -235,6 +236,12 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
 	pgio->pg_test = NULL;
 }
 
+static inline void
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
+{
+	pgio->pg_test = NULL;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0df18ae..6cf5de6 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -919,6 +919,8 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
 	} while (nbytes != 0);
 	atomic_set(&req->wb_complete, requests);
 
+	/* We know lseg==NULL */
+	lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
 	ClearPageError(page);
 	offset = 0;
 	nbytes = count;
@@ -940,6 +942,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
 		nbytes -= wsize;
 	} while (nbytes != 0);
 
+	put_lseg(lseg);
 	return ret;
 
 out_bad:
@@ -965,11 +968,18 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
 	struct nfs_page		*req;
 	struct page		**pages;
 	struct nfs_write_data	*data;
+	int ret;
 
 	data = nfs_writedata_alloc(npages);
-	if (!data)
-		goto out_bad;
-
+	if (!data) {
+		while (!list_empty(head)) {
+			req = nfs_list_entry(head->next);
+			nfs_list_remove_request(req);
+			nfs_redirty_request(req);
+		}
+		ret = -ENOMEM;
+		goto out;
+	}
 	pages = data->pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
@@ -979,16 +989,14 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
 		*pages++ = req->wb_page;
 	}
 	req = nfs_list_entry(data->pages.next);
+	if ((!lseg) && list_is_singular(&data->pages))
+		lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
 
 	/* Set up the argument struct */
-	return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
- out_bad:
-	while (!list_empty(head)) {
-		req = nfs_list_entry(head->next);
-		nfs_list_remove_request(req);
-		nfs_redirty_request(req);
-	}
-	return -ENOMEM;
+	ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
+out:
+	put_lseg(lseg);
+	return ret;
 }
 
 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -996,7 +1004,7 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 {
 	size_t wsize = NFS_SERVER(inode)->wsize;
 
-	pgio->pg_test = NULL;
+	pnfs_pageio_init_write(pgio, inode);
 
 	if (wsize < PAGE_CACHE_SIZE)
 		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 5/7] NFSv4.1: implement generic pnfs layer write switch
  2011-02-21 17:49 [PATCH 0/7] pnfs write functionality (wave 4) Fred Isaman
                   ` (3 preceding siblings ...)
  2011-02-21 17:49 ` [PATCH 4/7] NFSv4.1: trigger LAYOUTGET for writes Fred Isaman
@ 2011-02-21 17:49 ` Fred Isaman
  2011-02-21 18:53   ` Benny Halevy
  2011-02-21 17:49 ` [PATCH 6/7] NFSv4.1: remove GETATTR from ds writes Fred Isaman
  2011-02-21 17:49 ` [PATCH 7/7] NFSv4.1: pnfs filelayout driver write Fred Isaman
  6 siblings, 1 reply; 15+ messages in thread
From: Fred Isaman @ 2011-02-21 17:49 UTC (permalink / raw
  To: linux-nfs; +Cc: Trond Myklebust

From: Andy Adamson <andros@netapp.com>

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com>
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Mike Sager <sager@netapp.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Tao Guo <guotao@nrchpc.ac.cn>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/nfs4filelayout.c    |    7 +++++++
 fs/nfs/pnfs.c              |   24 ++++++++++++++++++++++++
 fs/nfs/pnfs.h              |   11 +++++++++++
 fs/nfs/write.c             |    4 ++++
 include/linux/nfs_iostat.h |    1 +
 include/linux/nfs_xdr.h    |    1 +
 6 files changed, 48 insertions(+), 0 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index b9f8044..553adea 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -236,6 +236,12 @@ filelayout_read_pagelist(struct nfs_read_data *data)
 	return PNFS_ATTEMPTED;
 }
 
+static enum pnfs_try_status
+filelayout_write_pagelist(struct nfs_write_data *data, int sync)
+{
+	return PNFS_NOT_ATTEMPTED;
+}
+
 /*
  * filelayout_check_layout()
  *
@@ -453,6 +459,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
 	.free_lseg		= filelayout_free_lseg,
 	.pg_test                = filelayout_pg_test,
 	.read_pagelist          = filelayout_read_pagelist,
+	.write_pagelist         = filelayout_write_pagelist,
 };
 
 static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0ed3948d..e8e7bab 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -901,6 +901,30 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
 	pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
 }
 
+enum pnfs_try_status
+pnfs_try_to_write_data(struct nfs_write_data *wdata,
+			const struct rpc_call_ops *call_ops, int how)
+{
+	struct inode *inode = wdata->inode;
+	enum pnfs_try_status trypnfs;
+	struct nfs_server *nfss = NFS_SERVER(inode);
+
+	wdata->mds_ops = call_ops;
+
+	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
+		inode->i_ino, wdata->args.count, wdata->args.offset, how);
+
+	trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
+	if (trypnfs == PNFS_NOT_ATTEMPTED) {
+		wdata->lseg = NULL;
+		put_lseg(wdata->lseg);
+	} else
+		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
+
+	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
+	return trypnfs;
+}
+
 /*
  * Call the appropriate parallel I/O subsystem read function.
  */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1d4e631..3766afea 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -79,6 +79,8 @@ struct pnfs_layoutdriver_type {
 	 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
 	 */
 	enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
+	enum pnfs_try_status
+	(*write_pagelist) (struct nfs_write_data *nfs_data, int how);
 };
 
 struct pnfs_layout_hdr {
@@ -120,6 +122,8 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
 		   enum pnfs_iomode access_type);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
+enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
+					     const struct rpc_call_ops *, int);
 enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
 					    const struct rpc_call_ops *);
 void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
@@ -200,6 +204,13 @@ pnfs_try_to_read_data(struct nfs_read_data *data,
 	return PNFS_NOT_ATTEMPTED;
 }
 
+static inline enum pnfs_try_status
+pnfs_try_to_write_data(struct nfs_write_data *data,
+		       const struct rpc_call_ops *call_ops, int how)
+{
+	return PNFS_NOT_ATTEMPTED;
+}
+
 static inline bool
 pnfs_roc(struct inode *ino)
 {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6cf5de6..b6b683d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -873,6 +873,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
 
+	if (data->lseg &&
+	    (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
+		return 0;
+
 	return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
 }
 
diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
index 37a1437..8866bb3 100644
--- a/include/linux/nfs_iostat.h
+++ b/include/linux/nfs_iostat.h
@@ -114,6 +114,7 @@ enum nfs_stat_eventcounters {
 	NFSIOS_SHORTWRITE,
 	NFSIOS_DELAY,
 	NFSIOS_PNFS_READ,
+	NFSIOS_PNFS_WRITE,
 	__NFSIOS_COUNTSMAX,
 };
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 09d9681..c82ad33 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1039,6 +1039,7 @@ struct nfs_write_data {
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
 	struct pnfs_layout_segment *lseg;
+	const struct rpc_call_ops *mds_ops;
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 #ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 6/7] NFSv4.1: remove GETATTR from ds writes
  2011-02-21 17:49 [PATCH 0/7] pnfs write functionality (wave 4) Fred Isaman
                   ` (4 preceding siblings ...)
  2011-02-21 17:49 ` [PATCH 5/7] NFSv4.1: implement generic pnfs layer write switch Fred Isaman
@ 2011-02-21 17:49 ` Fred Isaman
  2011-02-21 17:49 ` [PATCH 7/7] NFSv4.1: pnfs filelayout driver write Fred Isaman
  6 siblings, 0 replies; 15+ messages in thread
From: Fred Isaman @ 2011-02-21 17:49 UTC (permalink / raw
  To: linux-nfs; +Cc: Trond Myklebust

Any WRITE compound directed to a data server needs to have the
GETATTR calls suppressed.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/nfs4proc.c |    6 +++++-
 fs/nfs/nfs4xdr.c  |    8 +++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index bfdd36a..4ec34e2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3148,7 +3148,11 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 {
 	struct nfs_server *server = NFS_SERVER(data->inode);
 
-	data->args.bitmask = server->cache_consistency_bitmask;
+	if (data->lseg) {
+		data->args.bitmask = NULL;
+		data->res.fattr = NULL;
+	} else
+		data->args.bitmask = server->cache_consistency_bitmask;
 	if (!data->write_done_cb)
 		data->write_done_cb = nfs4_write_done_cb;
 	data->res.server = server;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2380c45..c35880c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2275,7 +2275,8 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_write(xdr, args, &hdr);
 	req->rq_snd_buf.flags |= XDRBUF_WRITE;
-	encode_getfattr(xdr, args->bitmask, &hdr);
+	if (args->bitmask)
+		encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -5694,8 +5695,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_write(xdr, res);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	if (res->fattr)
+		decode_getfattr(xdr, res->fattr, res->server,
+				!RPC_IS_ASYNC(rqstp->rq_task));
 	if (!status)
 		status = res->count;
 out:
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 7/7] NFSv4.1: pnfs filelayout driver write
  2011-02-21 17:49 [PATCH 0/7] pnfs write functionality (wave 4) Fred Isaman
                   ` (5 preceding siblings ...)
  2011-02-21 17:49 ` [PATCH 6/7] NFSv4.1: remove GETATTR from ds writes Fred Isaman
@ 2011-02-21 17:49 ` Fred Isaman
  2011-02-22 19:55   ` Fred Isaman
  6 siblings, 1 reply; 15+ messages in thread
From: Fred Isaman @ 2011-02-21 17:49 UTC (permalink / raw
  To: linux-nfs; +Cc: Trond Myklebust

Allows the pnfs filelayout driver to write to the data servers.

Note that COMMIT to data servers will be implemented in a future
patch.  To avoid improper behavior, for the moment any WRITE to a data
server that would also require a COMMIT to the data server is sent
NFS_FILE_SYNC.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com>
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Mingyang Guo <guomingyang@nrchpc.ac.cn>
Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/internal.h       |    5 ++
 fs/nfs/nfs4filelayout.c |   99 ++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfs/nfs4proc.c       |   17 ++++++++
 fs/nfs/write.c          |    5 ++-
 include/linux/nfs_xdr.h |    2 +
 5 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 1a3228e..d1ddc23 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -276,6 +276,10 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 
 /* write.c */
+extern int nfs_initiate_write(struct nfs_write_data *data,
+			      struct rpc_clnt *clnt,
+			      const struct rpc_call_ops *call_ops,
+			      int how);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
@@ -291,6 +295,7 @@ extern int nfs4_init_client(struct nfs_client *clp,
 			    const char *ip_addr,
 			    rpc_authflavor_t authflavour,
 			    int noresvport);
+extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
 extern int _nfs4_call_sync(struct nfs_server *server,
 			   struct rpc_message *msg,
 			   struct nfs4_sequence_args *args,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 553adea..bfd30de 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -189,12 +189,69 @@ static void filelayout_read_release(void *data)
 	rdata->mds_ops->rpc_release(data);
 }
 
+static int filelayout_write_done_cb(struct rpc_task *task,
+				struct nfs_write_data *data)
+{
+	int reset = 0;
+
+	if (filelayout_async_handle_error(task, data->args.context->state,
+					  data->ds_clp, &reset) == -EAGAIN) {
+		struct nfs_client *clp;
+
+		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
+			__func__, data->ds_clp, data->ds_clp->cl_session);
+		if (reset) {
+			nfs4_reset_write(task, data);
+			filelayout_set_lo_fail(data->lseg);
+			clp = NFS_SERVER(data->inode)->nfs_client;
+		} else
+			clp = data->ds_clp;
+		nfs_restart_rpc(task, clp);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static void filelayout_write_prepare(struct rpc_task *task, void *data)
+{
+	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+	if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
+				&wdata->args.seq_args, &wdata->res.seq_res,
+				0, task))
+		return;
+
+	rpc_call_start(task);
+}
+
+static void filelayout_write_call_done(struct rpc_task *task, void *data)
+{
+	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+	/* Note this may cause RPC to be resent */
+	wdata->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_write_release(void *data)
+{
+	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+	wdata->mds_ops->rpc_release(data);
+}
+
 struct rpc_call_ops filelayout_read_call_ops = {
 	.rpc_call_prepare = filelayout_read_prepare,
 	.rpc_call_done = filelayout_read_call_done,
 	.rpc_release = filelayout_read_release,
 };
 
+struct rpc_call_ops filelayout_write_call_ops = {
+	.rpc_call_prepare = filelayout_write_prepare,
+	.rpc_call_done = filelayout_write_call_done,
+	.rpc_release = filelayout_write_release,
+};
+
 static enum pnfs_try_status
 filelayout_read_pagelist(struct nfs_read_data *data)
 {
@@ -236,10 +293,50 @@ filelayout_read_pagelist(struct nfs_read_data *data)
 	return PNFS_ATTEMPTED;
 }
 
+/* Perform async writes. */
 static enum pnfs_try_status
 filelayout_write_pagelist(struct nfs_write_data *data, int sync)
 {
-	return PNFS_NOT_ATTEMPTED;
+	struct pnfs_layout_segment *lseg = data->lseg;
+	struct nfs4_pnfs_ds *ds;
+	loff_t offset = data->args.offset;
+	u32 j, idx;
+	struct nfs_fh *fh;
+
+	/* Retrieve the correct rpc_client for the byte range */
+	j = nfs4_fl_calc_j_index(lseg, offset);
+	idx = nfs4_fl_calc_ds_index(lseg, j);
+	ds = nfs4_fl_prepare_ds(lseg, idx);
+	if (!ds) {
+		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
+		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+		return PNFS_NOT_ATTEMPTED;
+	}
+	dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+		data->inode->i_ino, sync, (size_t) data->args.count, offset,
+		ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+
+	/* We can't handle commit to ds yet */
+	if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
+		data->args.stable = NFS_FILE_SYNC;
+
+	data->write_done_cb = filelayout_write_done_cb;
+	data->ds_clp = ds->ds_clp;
+	fh = nfs4_fl_select_ds_fh(lseg, j);
+	if (fh)
+		data->args.fh = fh;
+	/*
+	 * Get the file offset on the dserver. Set the write offset to
+	 * this offset and save the original offset.
+	 */
+	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
+	data->mds_offset = offset;
+
+	/* Perform an asynchronous write */
+	nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
+			   &filelayout_write_call_ops, sync);
+	return PNFS_ATTEMPTED;
 }
 
 /*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4ec34e2..c2ba9c4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3144,6 +3144,23 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 	return data->write_done_cb(task, data);
 }
 
+/* Reset the the nfs_write_data to send the write to the MDS. */
+void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
+{
+	dprintk("%s Reset task for i/o through\n", __func__);
+	put_lseg(data->lseg);
+	data->lseg          = NULL;
+	data->ds_clp        = NULL;
+	data->write_done_cb = nfs4_write_done_cb;
+	data->args.fh       = NFS_FH(data->inode);
+	data->args.bitmask  = data->res.server->cache_consistency_bitmask;
+	data->args.offset   = data->mds_offset;
+	data->res.fattr     = &data->fattr;
+	task->tk_ops        = data->mds_ops;
+	rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+}
+EXPORT_SYMBOL_GPL(nfs4_reset_write);
+
 static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
 	struct nfs_server *server = NFS_SERVER(data->inode);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b6b683d..a10f120 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -783,7 +783,7 @@ static int flush_task_priority(int how)
 	return RPC_PRIORITY_NORMAL;
 }
 
-static int nfs_initiate_write(struct nfs_write_data *data,
+int nfs_initiate_write(struct nfs_write_data *data,
 		       struct rpc_clnt *clnt,
 		       const struct rpc_call_ops *call_ops,
 		       int how)
@@ -833,6 +833,7 @@ static int nfs_initiate_write(struct nfs_write_data *data,
 out:
 	return ret;
 }
+EXPORT_SYMBOL(nfs_initiate_write);
 
 /*
  * Set up the argument/result storage required for the RPC call.
@@ -1194,6 +1195,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		 */
 		static unsigned long    complain;
 
+		/* Note this will print the MDS for a DS write */
 		if (time_before(complain, jiffies)) {
 			dprintk("NFS:       faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
@@ -1214,6 +1216,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 			/* Was this an NFSv2 write or an NFSv3 stable write? */
 			if (resp->verf->committed != NFS_UNSTABLE) {
 				/* Resend from where the server left off */
+				data->mds_offset += resp->count;
 				argp->offset += resp->count;
 				argp->pgbase += resp->count;
 				argp->count -= resp->count;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index c82ad33..3440f5a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1039,11 +1039,13 @@ struct nfs_write_data {
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
 	struct pnfs_layout_segment *lseg;
+	struct nfs_client	*ds_clp;	/* pNFS data server */
 	const struct rpc_call_ops *mds_ops;
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 #ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
 #endif
+	__u64			mds_offset;	/* Filelayout dense stripe */
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/7] NFSv4.1: trigger LAYOUTGET for writes
  2011-02-21 17:49 ` [PATCH 4/7] NFSv4.1: trigger LAYOUTGET for writes Fred Isaman
@ 2011-02-21 18:49   ` Benny Halevy
  2011-02-22  1:06     ` Fred Isaman
  2011-02-22  1:27     ` Trond Myklebust
  0 siblings, 2 replies; 15+ messages in thread
From: Benny Halevy @ 2011-02-21 18:49 UTC (permalink / raw
  To: Fred Isaman; +Cc: linux-nfs, Trond Myklebust

On 2011-02-21 09:49, Fred Isaman wrote:
> Signed-off-by: Fred Isaman <iisaman@netapp.com>
> ---
>  fs/nfs/pnfs.c  |   25 +++++++++++++++++++++++++
>  fs/nfs/pnfs.h  |    7 +++++++
>  fs/nfs/write.c |   32 ++++++++++++++++++++------------
>  3 files changed, 52 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index a7ea646..0ed3948d 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -876,6 +876,31 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
>  	pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
>  }
>  
> +static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
> +			      struct nfs_page *prev,
> +			      struct nfs_page *req)
> +{
> +	if (pgio->pg_count == prev->wb_bytes) {
> +		/* This is first coelesce call for a series of nfs_pages */
> +		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
> +						   prev->wb_context,
> +						   IOMODE_RW);
> +	}
> +	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
> +}
> +
> +/*
> + * rsize is already set by caller to MDS rsize.
> + */

This comment is confusing and looks out of place...

> +void
> +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
> +{
> +	struct pnfs_layoutdriver_type *ld;
> +
> +	ld = NFS_SERVER(inode)->pnfs_curr_ld;
> +	pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
> +}
> +
>  /*
>   * Call the appropriate parallel I/O subsystem read function.
>   */
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index acbb778..1d4e631 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -123,6 +123,7 @@ void unset_pnfs_layoutdriver(struct nfs_server *);
>  enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
>  					    const struct rpc_call_ops *);
>  void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
> +void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
>  int pnfs_layout_process(struct nfs4_layoutget *lgp);
>  void pnfs_free_lseg_list(struct list_head *tmp_list);
>  void pnfs_destroy_layout(struct nfs_inode *);
> @@ -235,6 +236,12 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
>  	pgio->pg_test = NULL;
>  }
>  
> +static inline void
> +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
> +{
> +	pgio->pg_test = NULL;
> +}
> +
>  #endif /* CONFIG_NFS_V4_1 */
>  
>  #endif /* FS_NFS_PNFS_H */
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index 0df18ae..6cf5de6 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -919,6 +919,8 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
>  	} while (nbytes != 0);
>  	atomic_set(&req->wb_complete, requests);
>  
> +	/* We know lseg==NULL */

Then maybe BUG_ON or WARN_ON(lseg != NULL)?

> +	lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
>  	ClearPageError(page);
>  	offset = 0;
>  	nbytes = count;
> @@ -940,6 +942,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
>  		nbytes -= wsize;
>  	} while (nbytes != 0);
>  
> +	put_lseg(lseg);
>  	return ret;
>  
>  out_bad:
> @@ -965,11 +968,18 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
>  	struct nfs_page		*req;
>  	struct page		**pages;
>  	struct nfs_write_data	*data;
> +	int ret;
>  
>  	data = nfs_writedata_alloc(npages);
> -	if (!data)
> -		goto out_bad;
> -
> +	if (!data) {
> +		while (!list_empty(head)) {
> +			req = nfs_list_entry(head->next);

nit: it'd be cleaner to define a nfs_list_first_entry helper in nfs_page.h
rather than using a combination of list helpers and open code (head->next).

> +			nfs_list_remove_request(req);
> +			nfs_redirty_request(req);
> +		}
> +		ret = -ENOMEM;
> +		goto out;
> +	}
>  	pages = data->pagevec;
>  	while (!list_empty(head)) {
>  		req = nfs_list_entry(head->next);
> @@ -979,16 +989,14 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
>  		*pages++ = req->wb_page;
>  	}
>  	req = nfs_list_entry(data->pages.next);
> +	if ((!lseg) && list_is_singular(&data->pages))
> +		lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
>  
>  	/* Set up the argument struct */
> -	return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
> - out_bad:
> -	while (!list_empty(head)) {
> -		req = nfs_list_entry(head->next);
> -		nfs_list_remove_request(req);
> -		nfs_redirty_request(req);
> -	}
> -	return -ENOMEM;
> +	ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
> +out:
> +	put_lseg(lseg);

Shouldn't you do that only if lseg was updated above?

Benny

> +	return ret;
>  }
>  
>  static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
> @@ -996,7 +1004,7 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
>  {
>  	size_t wsize = NFS_SERVER(inode)->wsize;
>  
> -	pgio->pg_test = NULL;
> +	pnfs_pageio_init_write(pgio, inode);
>  
>  	if (wsize < PAGE_CACHE_SIZE)
>  		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/7] NFSv4.1: implement generic pnfs layer write switch
  2011-02-21 17:49 ` [PATCH 5/7] NFSv4.1: implement generic pnfs layer write switch Fred Isaman
@ 2011-02-21 18:53   ` Benny Halevy
  2011-02-22  1:17     ` Fred Isaman
  0 siblings, 1 reply; 15+ messages in thread
From: Benny Halevy @ 2011-02-21 18:53 UTC (permalink / raw
  To: Fred Isaman; +Cc: linux-nfs, Trond Myklebust

On 2011-02-21 09:49, Fred Isaman wrote:
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 1d4e631..3766afea 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -79,6 +79,8 @@ struct pnfs_layoutdriver_type {
>  	 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
>  	 */
>  	enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
> +	enum pnfs_try_status
> +	(*write_pagelist) (struct nfs_write_data *nfs_data, int how);

nit: line break

>  };
>  
>  struct pnfs_layout_hdr {
> @@ -120,6 +122,8 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
>  		   enum pnfs_iomode access_type);
>  void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
>  void unset_pnfs_layoutdriver(struct nfs_server *);
> +enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
> +					     const struct rpc_call_ops *, int);
>  enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
>  					    const struct rpc_call_ops *);
>  void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
> @@ -200,6 +204,13 @@ pnfs_try_to_read_data(struct nfs_read_data *data,
>  	return PNFS_NOT_ATTEMPTED;
>  }
>  
> +static inline enum pnfs_try_status
> +pnfs_try_to_write_data(struct nfs_write_data *data,
> +		       const struct rpc_call_ops *call_ops, int how)
> +{
> +	return PNFS_NOT_ATTEMPTED;
> +}
> +
>  static inline bool
>  pnfs_roc(struct inode *ino)
>  {
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index 6cf5de6..b6b683d 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -873,6 +873,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
>  	data->res.verf    = &data->verf;
>  	nfs_fattr_init(&data->fattr);
>  
> +	if (data->lseg &&
> +	    (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
> +		return 0;
> +
>  	return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
>  }
>  
> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
> index 37a1437..8866bb3 100644
> --- a/include/linux/nfs_iostat.h
> +++ b/include/linux/nfs_iostat.h
> @@ -114,6 +114,7 @@ enum nfs_stat_eventcounters {
>  	NFSIOS_SHORTWRITE,
>  	NFSIOS_DELAY,
>  	NFSIOS_PNFS_READ,
> +	NFSIOS_PNFS_WRITE,
>  	__NFSIOS_COUNTSMAX,
>  };
>  
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 09d9681..c82ad33 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -1039,6 +1039,7 @@ struct nfs_write_data {
>  	struct nfs_writeargs	args;		/* argument struct */
>  	struct nfs_writeres	res;		/* result struct */
>  	struct pnfs_layout_segment *lseg;
> +	const struct rpc_call_ops *mds_ops;

nit: mds_ops is not really used in the patch, just initialized.
better introduce it along with its usage in patch 7/7.

Benny

>  	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
>  #ifdef CONFIG_NFS_V4
>  	unsigned long		timestamp;	/* For lease renewal */

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/7] NFSv4.1: trigger LAYOUTGET for writes
  2011-02-21 18:49   ` Benny Halevy
@ 2011-02-22  1:06     ` Fred Isaman
  2011-02-22  2:14       ` Benny Halevy
  2011-02-22  1:27     ` Trond Myklebust
  1 sibling, 1 reply; 15+ messages in thread
From: Fred Isaman @ 2011-02-22  1:06 UTC (permalink / raw
  To: Benny Halevy; +Cc: linux-nfs, Trond Myklebust


On Feb 21, 2011, at 10:49 AM, Benny Halevy wrote:

> On 2011-02-21 09:49, Fred Isaman wrote:
>> Signed-off-by: Fred Isaman <iisaman@netapp.com>
>> ---
>> fs/nfs/pnfs.c  |   25 +++++++++++++++++++++++++
>> fs/nfs/pnfs.h  |    7 +++++++
>> fs/nfs/write.c |   32 ++++++++++++++++++++------------
>> 3 files changed, 52 insertions(+), 12 deletions(-)
>> 
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index a7ea646..0ed3948d 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -876,6 +876,31 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
>> 	pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
>> }
>> 
>> +static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
>> +			      struct nfs_page *prev,
>> +			      struct nfs_page *req)
>> +{
>> +	if (pgio->pg_count == prev->wb_bytes) {
>> +		/* This is first coelesce call for a series of nfs_pages */
>> +		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>> +						   prev->wb_context,
>> +						   IOMODE_RW);
>> +	}
>> +	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
>> +}
>> +
>> +/*
>> + * rsize is already set by caller to MDS rsize.
>> + */
> 
> This comment is confusing and looks out of place...

OK.  I suggest we  remove similar comment in the read code too.

> 
>> +void
>> +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
>> +{
>> +	struct pnfs_layoutdriver_type *ld;
>> +
>> +	ld = NFS_SERVER(inode)->pnfs_curr_ld;
>> +	pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
>> +}
>> +
>> /*
>>  * Call the appropriate parallel I/O subsystem read function.
>>  */
>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> index acbb778..1d4e631 100644
>> --- a/fs/nfs/pnfs.h
>> +++ b/fs/nfs/pnfs.h
>> @@ -123,6 +123,7 @@ void unset_pnfs_layoutdriver(struct nfs_server *);
>> enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
>> 					    const struct rpc_call_ops *);
>> void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
>> +void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
>> int pnfs_layout_process(struct nfs4_layoutget *lgp);
>> void pnfs_free_lseg_list(struct list_head *tmp_list);
>> void pnfs_destroy_layout(struct nfs_inode *);
>> @@ -235,6 +236,12 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
>> 	pgio->pg_test = NULL;
>> }
>> 
>> +static inline void
>> +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
>> +{
>> +	pgio->pg_test = NULL;
>> +}
>> +
>> #endif /* CONFIG_NFS_V4_1 */
>> 
>> #endif /* FS_NFS_PNFS_H */
>> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
>> index 0df18ae..6cf5de6 100644
>> --- a/fs/nfs/write.c
>> +++ b/fs/nfs/write.c
>> @@ -919,6 +919,8 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
>> 	} while (nbytes != 0);
>> 	atomic_set(&req->wb_complete, requests);
>> 
>> +	/* We know lseg==NULL */
> 
> Then maybe BUG_ON or WARN_ON(lseg != NULL)?

OK.


> 
>> +	lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
>> 	ClearPageError(page);
>> 	offset = 0;
>> 	nbytes = count;
>> @@ -940,6 +942,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
>> 		nbytes -= wsize;
>> 	} while (nbytes != 0);
>> 
>> +	put_lseg(lseg);
>> 	return ret;
>> 
>> out_bad:
>> @@ -965,11 +968,18 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
>> 	struct nfs_page		*req;
>> 	struct page		**pages;
>> 	struct nfs_write_data	*data;
>> +	int ret;
>> 
>> 	data = nfs_writedata_alloc(npages);
>> -	if (!data)
>> -		goto out_bad;
>> -
>> +	if (!data) {
>> +		while (!list_empty(head)) {
>> +			req = nfs_list_entry(head->next);
> 
> nit: it'd be cleaner to define a nfs_list_first_entry helper in nfs_page.h
> rather than using a combination of list helpers and open code (head->next).

I would think that would be a separate patch. As this is just a simple move of pre-existing code,
it is easier to see what is happening if I leave the copied code the same.


> 
>> +			nfs_list_remove_request(req);
>> +			nfs_redirty_request(req);
>> +		}
>> +		ret = -ENOMEM;
>> +		goto out;
>> +	}
>> 	pages = data->pagevec;
>> 	while (!list_empty(head)) {
>> 		req = nfs_list_entry(head->next);
>> @@ -979,16 +989,14 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
>> 		*pages++ = req->wb_page;
>> 	}
>> 	req = nfs_list_entry(data->pages.next);
>> +	if ((!lseg) && list_is_singular(&data->pages))
>> +		lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
>> 
>> 	/* Set up the argument struct */
>> -	return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
>> - out_bad:
>> -	while (!list_empty(head)) {
>> -		req = nfs_list_entry(head->next);
>> -		nfs_list_remove_request(req);
>> -		nfs_redirty_request(req);
>> -	}
>> -	return -ENOMEM;
>> +	ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
>> +out:
>> +	put_lseg(lseg);
> 
> Shouldn't you do that only if lseg was updated above?
> 

No, this is the put matching the get done in the pg_test function.  I'll comment that.

Fred


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/7] NFSv4.1: implement generic pnfs layer write switch
  2011-02-21 18:53   ` Benny Halevy
@ 2011-02-22  1:17     ` Fred Isaman
  0 siblings, 0 replies; 15+ messages in thread
From: Fred Isaman @ 2011-02-22  1:17 UTC (permalink / raw
  To: Benny Halevy; +Cc: linux-nfs, Trond Myklebust


On Feb 21, 2011, at 10:53 AM, Benny Halevy wrote:

> On 2011-02-21 09:49, Fred Isaman wrote:
>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> index 1d4e631..3766afea 100644
>> --- a/fs/nfs/pnfs.h
>> +++ b/fs/nfs/pnfs.h
>> @@ -79,6 +79,8 @@ struct pnfs_layoutdriver_type {
>> 	 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
>> 	 */
>> 	enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
>> +	enum pnfs_try_status
>> +	(*write_pagelist) (struct nfs_write_data *nfs_data, int how);
> 
> nit: line break

OK


> 
>> };
>> 
>> struct pnfs_layout_hdr {
>> @@ -120,6 +122,8 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
>> 		   enum pnfs_iomode access_type);
>> void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
>> void unset_pnfs_layoutdriver(struct nfs_server *);
>> +enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
>> +					     const struct rpc_call_ops *, int);
>> enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
>> 					    const struct rpc_call_ops *);
>> void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
>> @@ -200,6 +204,13 @@ pnfs_try_to_read_data(struct nfs_read_data *data,
>> 	return PNFS_NOT_ATTEMPTED;
>> }
>> 
>> +static inline enum pnfs_try_status
>> +pnfs_try_to_write_data(struct nfs_write_data *data,
>> +		       const struct rpc_call_ops *call_ops, int how)
>> +{
>> +	return PNFS_NOT_ATTEMPTED;
>> +}
>> +
>> static inline bool
>> pnfs_roc(struct inode *ino)
>> {
>> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
>> index 6cf5de6..b6b683d 100644
>> --- a/fs/nfs/write.c
>> +++ b/fs/nfs/write.c
>> @@ -873,6 +873,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
>> 	data->res.verf    = &data->verf;
>> 	nfs_fattr_init(&data->fattr);
>> 
>> +	if (data->lseg &&
>> +	    (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
>> +		return 0;
>> +
>> 	return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
>> }
>> 
>> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
>> index 37a1437..8866bb3 100644
>> --- a/include/linux/nfs_iostat.h
>> +++ b/include/linux/nfs_iostat.h
>> @@ -114,6 +114,7 @@ enum nfs_stat_eventcounters {
>> 	NFSIOS_SHORTWRITE,
>> 	NFSIOS_DELAY,
>> 	NFSIOS_PNFS_READ,
>> +	NFSIOS_PNFS_WRITE,
>> 	__NFSIOS_COUNTSMAX,
>> };
>> 
>> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
>> index 09d9681..c82ad33 100644
>> --- a/include/linux/nfs_xdr.h
>> +++ b/include/linux/nfs_xdr.h
>> @@ -1039,6 +1039,7 @@ struct nfs_write_data {
>> 	struct nfs_writeargs	args;		/* argument struct */
>> 	struct nfs_writeres	res;		/* result struct */
>> 	struct pnfs_layout_segment *lseg;
>> +	const struct rpc_call_ops *mds_ops;
> 
> nit: mds_ops is not really used in the patch, just initialized.
> better introduce it along with its usage in patch 7/7.

OK.

Fred

> 
> Benny
> 
>> 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
>> #ifdef CONFIG_NFS_V4
>> 	unsigned long		timestamp;	/* For lease renewal */


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/7] NFSv4.1: trigger LAYOUTGET for writes
  2011-02-21 18:49   ` Benny Halevy
  2011-02-22  1:06     ` Fred Isaman
@ 2011-02-22  1:27     ` Trond Myklebust
  1 sibling, 0 replies; 15+ messages in thread
From: Trond Myklebust @ 2011-02-22  1:27 UTC (permalink / raw
  To: Benny Halevy; +Cc: Fred Isaman, linux-nfs

On Mon, 2011-02-21 at 10:49 -0800, Benny Halevy wrote: 
> On 2011-02-21 09:49, Fred Isaman wrote:
> > @@ -965,11 +968,18 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
> >  	struct nfs_page		*req;
> >  	struct page		**pages;
> >  	struct nfs_write_data	*data;
> > +	int ret;
> >  
> >  	data = nfs_writedata_alloc(npages);
> > -	if (!data)
> > -		goto out_bad;
> > -
> > +	if (!data) {
> > +		while (!list_empty(head)) {
> > +			req = nfs_list_entry(head->next);
> 
> nit: it'd be cleaner to define a nfs_list_first_entry helper in nfs_page.h
> rather than using a combination of list helpers and open code (head->next).

No thanks! Let's not add lots of helpers for trivial stuff unless that
results in a clear gain in type safety.

   Trond

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/7] NFSv4.1: trigger LAYOUTGET for writes
  2011-02-22  1:06     ` Fred Isaman
@ 2011-02-22  2:14       ` Benny Halevy
  0 siblings, 0 replies; 15+ messages in thread
From: Benny Halevy @ 2011-02-22  2:14 UTC (permalink / raw
  To: Fred Isaman; +Cc: linux-nfs, Trond Myklebust

On 2011-02-21 17:06, Fred Isaman wrote:
> 
> On Feb 21, 2011, at 10:49 AM, Benny Halevy wrote:
> 
>> On 2011-02-21 09:49, Fred Isaman wrote:
>>> Signed-off-by: Fred Isaman <iisaman@netapp.com>
>>> ---
>>> fs/nfs/pnfs.c  |   25 +++++++++++++++++++++++++
>>> fs/nfs/pnfs.h  |    7 +++++++
>>> fs/nfs/write.c |   32 ++++++++++++++++++++------------
>>> 3 files changed, 52 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>> index a7ea646..0ed3948d 100644
>>> --- a/fs/nfs/pnfs.c
>>> +++ b/fs/nfs/pnfs.c
>>> @@ -876,6 +876,31 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
>>> 	pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
>>> }
>>>
>>> +static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
>>> +			      struct nfs_page *prev,
>>> +			      struct nfs_page *req)
>>> +{
>>> +	if (pgio->pg_count == prev->wb_bytes) {
>>> +		/* This is first coelesce call for a series of nfs_pages */
>>> +		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>>> +						   prev->wb_context,
>>> +						   IOMODE_RW);
>>> +	}
>>> +	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
>>> +}
>>> +
>>> +/*
>>> + * rsize is already set by caller to MDS rsize.
>>> + */
>>
>> This comment is confusing and looks out of place...
> 
> OK.  I suggest we  remove similar comment in the read code too.
> 

Yeah.  Works for me.

>>
>>> +void
>>> +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
>>> +{
>>> +	struct pnfs_layoutdriver_type *ld;
>>> +
>>> +	ld = NFS_SERVER(inode)->pnfs_curr_ld;
>>> +	pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
>>> +}
>>> +
>>> /*
>>>  * Call the appropriate parallel I/O subsystem read function.
>>>  */
>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>> index acbb778..1d4e631 100644
>>> --- a/fs/nfs/pnfs.h
>>> +++ b/fs/nfs/pnfs.h
>>> @@ -123,6 +123,7 @@ void unset_pnfs_layoutdriver(struct nfs_server *);
>>> enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
>>> 					    const struct rpc_call_ops *);
>>> void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
>>> +void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
>>> int pnfs_layout_process(struct nfs4_layoutget *lgp);
>>> void pnfs_free_lseg_list(struct list_head *tmp_list);
>>> void pnfs_destroy_layout(struct nfs_inode *);
>>> @@ -235,6 +236,12 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
>>> 	pgio->pg_test = NULL;
>>> }
>>>
>>> +static inline void
>>> +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
>>> +{
>>> +	pgio->pg_test = NULL;
>>> +}
>>> +
>>> #endif /* CONFIG_NFS_V4_1 */
>>>
>>> #endif /* FS_NFS_PNFS_H */
>>> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
>>> index 0df18ae..6cf5de6 100644
>>> --- a/fs/nfs/write.c
>>> +++ b/fs/nfs/write.c
>>> @@ -919,6 +919,8 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
>>> 	} while (nbytes != 0);
>>> 	atomic_set(&req->wb_complete, requests);
>>>
>>> +	/* We know lseg==NULL */
>>
>> Then maybe BUG_ON or WARN_ON(lseg != NULL)?
> 
> OK.
> 
> 
>>
>>> +	lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
>>> 	ClearPageError(page);
>>> 	offset = 0;
>>> 	nbytes = count;
>>> @@ -940,6 +942,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
>>> 		nbytes -= wsize;
>>> 	} while (nbytes != 0);
>>>
>>> +	put_lseg(lseg);
>>> 	return ret;
>>>
>>> out_bad:
>>> @@ -965,11 +968,18 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
>>> 	struct nfs_page		*req;
>>> 	struct page		**pages;
>>> 	struct nfs_write_data	*data;
>>> +	int ret;
>>>
>>> 	data = nfs_writedata_alloc(npages);
>>> -	if (!data)
>>> -		goto out_bad;
>>> -
>>> +	if (!data) {
>>> +		while (!list_empty(head)) {
>>> +			req = nfs_list_entry(head->next);
>>
>> nit: it'd be cleaner to define a nfs_list_first_entry helper in nfs_page.h
>> rather than using a combination of list helpers and open code (head->next).
> 
> I would think that would be a separate patch. As this is just a simple move of pre-existing code,
> it is easier to see what is happening if I leave the copied code the same.
> 
> 

OK, makes sense.

Benny

>>
>>> +			nfs_list_remove_request(req);
>>> +			nfs_redirty_request(req);
>>> +		}
>>> +		ret = -ENOMEM;
>>> +		goto out;
>>> +	}
>>> 	pages = data->pagevec;
>>> 	while (!list_empty(head)) {
>>> 		req = nfs_list_entry(head->next);
>>> @@ -979,16 +989,14 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
>>> 		*pages++ = req->wb_page;
>>> 	}
>>> 	req = nfs_list_entry(data->pages.next);
>>> +	if ((!lseg) && list_is_singular(&data->pages))
>>> +		lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
>>>
>>> 	/* Set up the argument struct */
>>> -	return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
>>> - out_bad:
>>> -	while (!list_empty(head)) {
>>> -		req = nfs_list_entry(head->next);
>>> -		nfs_list_remove_request(req);
>>> -		nfs_redirty_request(req);
>>> -	}
>>> -	return -ENOMEM;
>>> +	ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
>>> +out:
>>> +	put_lseg(lseg);
>>
>> Shouldn't you do that only if lseg was updated above?
>>
> 
> No, this is the put matching the get done in the pg_test function.  I'll comment that.
> 
> Fred
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 7/7] NFSv4.1: pnfs filelayout driver write
  2011-02-21 17:49 ` [PATCH 7/7] NFSv4.1: pnfs filelayout driver write Fred Isaman
@ 2011-02-22 19:55   ` Fred Isaman
  0 siblings, 0 replies; 15+ messages in thread
From: Fred Isaman @ 2011-02-22 19:55 UTC (permalink / raw
  To: linux-nfs; +Cc: Trond Myklebust

On Mon, Feb 21, 2011 at 9:49 AM, Fred Isaman <iisaman@netapp.com> wrote:
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index b6b683d..a10f120 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -783,7 +783,7 @@ static int flush_task_priority(int how)
>        return RPC_PRIORITY_NORMAL;
>  }
>
> -static int nfs_initiate_write(struct nfs_write_data *data,
> +int nfs_initiate_write(struct nfs_write_data *data,
>                       struct rpc_clnt *clnt,
>                       const struct rpc_call_ops *call_ops,
>                       int how)
> @@ -833,6 +833,7 @@ static int nfs_initiate_write(struct nfs_write_data *data,
>  out:
>        return ret;
>  }
> +EXPORT_SYMBOL(nfs_initiate_write);
>

This should be EXPORT_SYMBOL_GPL.

Fred

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2011-02-22 19:55 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-02-21 17:49 [PATCH 0/7] pnfs write functionality (wave 4) Fred Isaman
2011-02-21 17:49 ` [PATCH 1/7] NFSv4.1: rearrange nfs_write_rpcsetup Fred Isaman
2011-02-21 17:49 ` [PATCH 2/7] NFSv4.1: add callback to nfs4_write_done Fred Isaman
2011-02-21 17:49 ` [PATCH 3/7] NFSv4.1: Send lseg down into nfs_write_rpcsetup Fred Isaman
2011-02-21 17:49 ` [PATCH 4/7] NFSv4.1: trigger LAYOUTGET for writes Fred Isaman
2011-02-21 18:49   ` Benny Halevy
2011-02-22  1:06     ` Fred Isaman
2011-02-22  2:14       ` Benny Halevy
2011-02-22  1:27     ` Trond Myklebust
2011-02-21 17:49 ` [PATCH 5/7] NFSv4.1: implement generic pnfs layer write switch Fred Isaman
2011-02-21 18:53   ` Benny Halevy
2011-02-22  1:17     ` Fred Isaman
2011-02-21 17:49 ` [PATCH 6/7] NFSv4.1: remove GETATTR from ds writes Fred Isaman
2011-02-21 17:49 ` [PATCH 7/7] NFSv4.1: pnfs filelayout driver write Fred Isaman
2011-02-22 19:55   ` Fred Isaman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.