From: Luis Chamberlain <mcgrof@kernel.org>
To: torvalds@linux-foundation.org, patches@lists.linux.dev
Cc: mcgrof@kernel.org
Subject: [PATCH] filemap: small read filemap_read() stack optimization
Date: Tue, 14 May 2024 01:42:21 -0700 [thread overview]
Message-ID: <20240514084221.3664475-1-mcgrof@kernel.org> (raw)
[ mcgrof: pick up 128 byte stack space (on 64-bit) filemap opt ].
https://lkml.kernel.org/r/CAHk-=wjOogaW0yLoUqQ0WfQ=etPA4cOFLy56VYCnHVU_DOMLrg@mail.gmail.com
So, just hypothetically, let's say that *before* we start using that
folio batch buffer for folios, we use it as a dummy buffer for small
reads.
So we'd make that 'fbatch' thing be a union with a temporary byte
buffer.
That hypothetical patch might look something like this TOTALLY UNTESTED
CRAP.
Anybody interested in seeing if something like this might actually
work? I do want to emphasize the "something like this".
This pile of random thoughts ends up compiling for me, and I _tried_
to think of all the cases, but there might be obvious thinkos, and
there might be things I just didn't think about at all.
I really haven't tested this AT ALL. I'm much too scared. But I don't
actually hate how the code looks nearly as much as I *thought* I'd
hate it.
Linus
[ mcgrof: posting this to allow easier testing with automation ]
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
mm/filemap.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 107 insertions(+), 10 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index 30de18c4fd28..7da759e53a16 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2550,6 +2550,85 @@ static inline bool pos_same_folio(loff_t pos1, loff_t pos2, struct folio *folio)
return (pos1 >> shift == pos2 >> shift);
}
+/*
+ * I can't be bothered to care about HIGHMEM for the fast read case
+ */
+#ifdef CONFIG_HIGHMEM
+#define filemap_fast_read(mapping, pos, buffer, size) 0
+#else
+
+/*
+ * Called under RCU with size limited to the file size and one
+ */
+static unsigned long filemap_folio_copy_rcu(struct address_space *mapping, loff_t pos, char *buffer, size_t size)
+{
+ XA_STATE(xas, &mapping->i_pages, pos >> PAGE_SHIFT);
+ struct folio *folio;
+ size_t offset;
+
+ xas_reset(&xas);
+ folio = xas_load(&xas);
+ if (xas_retry(&xas, folio))
+ return 0;
+
+ if (!folio || xa_is_value(folio))
+ return 0;
+
+ if (!folio_test_uptodate(folio))
+ return 0;
+
+ /* No fast-case if we are supposed to start readahead */
+ if (folio_test_readahead(folio))
+ return 0;
+ /* .. or mark it accessed */
+ if (!folio_test_referenced(folio))
+ return 0;
+
+ /* Do the data copy */
+ offset = pos & (folio_size(folio) - 1);
+ memcpy(buffer, folio_address(folio) + offset, size);
+
+ /* We should probably do some silly memory barrier here */
+ if (unlikely(folio != xas_reload(&xas)))
+ return 0;
+
+ return size;
+}
+
+/*
+ * Iff we can complete the read completely in one atomic go under RCU,
+ * do so here. Otherwise return 0 (no partial reads, please - this is
+ * purely for the trivial fast case).
+ */
+static unsigned long filemap_fast_read(struct address_space *mapping, loff_t pos, char *buffer, size_t size)
+{
+ struct inode *inode;
+ loff_t file_size;
+ unsigned long pgoff;
+
+ /* Don't even try for page-crossers */
+ pgoff = pos & ~PAGE_MASK;
+ if (pgoff + size > PAGE_SIZE)
+ return 0;
+
+ /* Limit it to the file size */
+ inode = mapping->host;
+ file_size = i_size_read(inode);
+ if (unlikely(pos >= file_size))
+ return 0;
+ file_size -= pos;
+ if (file_size < size)
+ size = file_size;
+
+ /* Let's see if we can just do the read under RCU */
+ rcu_read_lock();
+ size = filemap_folio_copy_rcu(mapping, pos, buffer, size);
+ rcu_read_unlock();
+
+ return size;
+}
+#endif /* !HIGHMEM */
+
/**
* filemap_read - Read data from the page cache.
* @iocb: The iocb to read.
@@ -2570,7 +2649,10 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
struct file_ra_state *ra = &filp->f_ra;
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
- struct folio_batch fbatch;
+ union {
+ struct folio_batch fbatch;
+ __DECLARE_FLEX_ARRAY(char, buffer);
+ } area;
int i, error = 0;
bool writably_mapped;
loff_t isize, end_offset;
@@ -2582,7 +2664,22 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
return 0;
iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
- folio_batch_init(&fbatch);
+
+ if (iov_iter_count(iter) < sizeof(area)) {
+ unsigned long count = iov_iter_count(iter);
+
+ count = filemap_fast_read(mapping, iocb->ki_pos, area.buffer, count);
+ if (count) {
+ size_t copied = copy_to_iter(area.buffer, count, iter);
+ if (unlikely(!copied))
+ return already_read ? already_read : -EFAULT;
+ ra->prev_pos = iocb->ki_pos += copied;
+ file_accessed(filp);
+ return copied + already_read;
+ }
+ }
+
+ folio_batch_init(&area.fbatch);
do {
cond_resched();
@@ -2598,7 +2695,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
if (unlikely(iocb->ki_pos >= i_size_read(inode)))
break;
- error = filemap_get_pages(iocb, iter->count, &fbatch, false);
+ error = filemap_get_pages(iocb, iter->count, &area.fbatch, false);
if (error < 0)
break;
@@ -2626,11 +2723,11 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
* mark it as accessed the first time.
*/
if (!pos_same_folio(iocb->ki_pos, last_pos - 1,
- fbatch.folios[0]))
- folio_mark_accessed(fbatch.folios[0]);
+ area.fbatch.folios[0]))
+ folio_mark_accessed(area.fbatch.folios[0]);
- for (i = 0; i < folio_batch_count(&fbatch); i++) {
- struct folio *folio = fbatch.folios[i];
+ for (i = 0; i < folio_batch_count(&area.fbatch); i++) {
+ struct folio *folio = area.fbatch.folios[i];
size_t fsize = folio_size(folio);
size_t offset = iocb->ki_pos & (fsize - 1);
size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
@@ -2661,9 +2758,9 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
}
}
put_folios:
- for (i = 0; i < folio_batch_count(&fbatch); i++)
- folio_put(fbatch.folios[i]);
- folio_batch_init(&fbatch);
+ for (i = 0; i < folio_batch_count(&area.fbatch); i++)
+ folio_put(area.fbatch.folios[i]);
+ folio_batch_init(&area.fbatch);
} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
file_accessed(filp);
--
2.43.0
reply other threads:[~2024-05-14 8:42 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240514084221.3664475-1-mcgrof@kernel.org \
--to=mcgrof@kernel.org \
--cc=patches@lists.linux.dev \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).