From: Matthew Wilcox <willy@infradead.org>
To: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
David Sterba <dsterba@suse.com>
Cc: linux-btrfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
Jan Kara <jack@suse.cz>
Subject: Removing PG_error use from btrfs
Date: Thu, 18 Apr 2024 18:41:47 +0100 [thread overview]
Message-ID: <ZiFbWx6o-hQ38QyZ@casper.infradead.org> (raw)
We're down to just JFS and btrfs using the PG_error flag. I sent a
patch earlier to remove PG_error from JFS, so now it's your turn ...
btrfs currently uses it to indicate superblock writeback errors.
This proposal moves that information to a counter in the btrfs_device.
Maybe this isn't the best approach. What do you think?
I'm currently running fstests against it and it hasn't blown up yet.
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3d512b041977..5f6f8472ecec 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3627,28 +3627,24 @@ ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
static void btrfs_end_super_write(struct bio *bio)
{
struct btrfs_device *device = bio->bi_private;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
- struct page *page;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- page = bvec->bv_page;
+ struct folio_iter fi;
+ bio_for_each_folio_all(fi, bio) {
if (bio->bi_status) {
btrfs_warn_rl_in_rcu(device->fs_info,
- "lost page write due to IO error on %s (%d)",
+ "lost sb write due to IO error on %s (%d)",
btrfs_dev_name(device),
blk_status_to_errno(bio->bi_status));
- ClearPageUptodate(page);
- SetPageError(page);
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_WRITE_ERRS);
- } else {
- SetPageUptodate(page);
+ /* Ensure failure if a primary sb fails */
+ if (bio->bi_opf & REQ_FUA)
+ atomic_set(&device->sb_wb_errors, INT_MAX / 2);
+ else
+ atomic_inc(&device->sb_wb_errors);
}
-
- put_page(page);
- unlock_page(page);
+ folio_unlock(fi.folio);
+ folio_put(fi.folio);
}
bio_put(bio);
@@ -3750,19 +3746,21 @@ static int write_dev_supers(struct btrfs_device *device,
struct address_space *mapping = device->bdev->bd_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
- int errors = 0;
int ret;
u64 bytenr, bytenr_orig;
+ atomic_set(&device->sb_wb_errors, 0);
+
if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
shash->tfm = fs_info->csum_shash;
for (i = 0; i < max_mirrors; i++) {
- struct page *page;
+ struct folio *folio;
struct bio *bio;
struct btrfs_super_block *disk_super;
+ size_t offset;
bytenr_orig = btrfs_sb_offset(i);
ret = btrfs_sb_log_location(device, i, WRITE, &bytenr);
@@ -3772,7 +3770,7 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_err(device->fs_info,
"couldn't get super block location for mirror %d",
i);
- errors++;
+ atomic_inc(&device->sb_wb_errors);
continue;
}
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
@@ -3785,20 +3783,18 @@ static int write_dev_supers(struct btrfs_device *device,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
sb->csum);
- page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
- GFP_NOFS);
- if (!page) {
+ folio = __filemap_get_folio(mapping, bytenr >> PAGE_SHIFT,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS);
+ if (IS_ERR(folio)) {
btrfs_err(device->fs_info,
"couldn't get super block page for bytenr %llu",
bytenr);
- errors++;
+ atomic_inc(&device->sb_wb_errors);
continue;
}
- /* Bump the refcount for wait_dev_supers() */
- get_page(page);
-
- disk_super = page_address(page);
+ offset = offset_in_folio(folio, bytenr);
+ disk_super = folio_address(folio) + offset;
memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
/*
@@ -3812,8 +3808,7 @@ static int write_dev_supers(struct btrfs_device *device,
bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
bio->bi_private = device;
bio->bi_end_io = btrfs_end_super_write;
- __bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
- offset_in_page(bytenr));
+ bio_add_folio_nofail(bio, folio, BTRFS_SUPER_INFO_SIZE, offset);
/*
* We FUA only the first super block. The others we allow to
@@ -3825,9 +3820,9 @@ static int write_dev_supers(struct btrfs_device *device,
submit_bio(bio);
if (btrfs_advance_sb_log(device, i))
- errors++;
+ atomic_inc(&device->sb_wb_errors);
}
- return errors < i ? 0 : -1;
+ return atomic_read(&device->sb_wb_errors) < i ? 0 : -1;
}
/*
@@ -3849,7 +3844,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
for (i = 0; i < max_mirrors; i++) {
- struct page *page;
+ struct folio *folio;
ret = btrfs_sb_log_location(device, i, READ, &bytenr);
if (ret == -ENOENT) {
@@ -3864,29 +3859,19 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
device->commit_total_bytes)
break;
- page = find_get_page(device->bdev->bd_mapping,
+ folio = filemap_get_folio(device->bdev->bd_mapping,
bytenr >> PAGE_SHIFT);
- if (!page) {
- errors++;
- if (i == 0)
- primary_failed = true;
+ /* If the folio has been removed, then we know it completed */
+ if (IS_ERR(folio))
continue;
- }
- /* Page is submitted locked and unlocked once the IO completes */
- wait_on_page_locked(page);
- if (PageError(page)) {
- errors++;
- if (i == 0)
- primary_failed = true;
- }
-
- /* Drop our reference */
- put_page(page);
-
- /* Drop the reference from the writing run */
- put_page(page);
+ /* Folio is unlocked once the IO completes */
+ folio_wait_locked(folio);
+ folio_put(folio);
}
+ errors += atomic_read(&device->sb_wb_errors);
+ if (errors >= INT_MAX / 2)
+ primary_failed = true;
/* log error, force error return */
if (primary_failed) {
btrfs_err(device->fs_info, "error writing primary super block to device %llu",
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index cf555f5b47ce..44c639720426 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -142,6 +142,8 @@ struct btrfs_device {
/* type and info about this device */
u64 type;
+ atomic_t sb_wb_errors;
+
/* minimal io size for this device */
u32 sector_size;
next reply other threads:[~2024-04-18 17:41 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-18 17:41 Matthew Wilcox [this message]
2024-04-18 18:00 ` Removing PG_error use from btrfs David Sterba
2024-04-18 19:07 ` Matthew Wilcox
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZiFbWx6o-hQ38QyZ@casper.infradead.org \
--to=willy@infradead.org \
--cc=clm@fb.com \
--cc=dsterba@suse.com \
--cc=jack@suse.cz \
--cc=josef@toxicpanda.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).