[PATCH 3/3] xfs_repair: rebuild block mappings from rmapbt data

All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed

From: "Darrick J. Wong" <djwong@kernel.org>
To: cem@kernel.org, djwong@kernel.org
Cc: Christoph Hellwig <hch@lst.de>,
	Bill O'Donnell <bodonnel@redhat.com>,
	linux-xfs@vger.kernel.org
Subject: [PATCH 3/3] xfs_repair: rebuild block mappings from rmapbt data
Date: Wed, 17 Apr 2024 14:44:28 -0700	[thread overview]
Message-ID: <171338845464.1856515.6344335951685084221.stgit@frogsfrogsfrogs> (raw)
In-Reply-To: <171338845416.1856515.8750904442149650753.stgit@frogsfrogsfrogs>

From: Darrick J. Wong <djwong@kernel.org>

Use rmap records to rebuild corrupt inode forks instead of zapping
the whole inode if we think the rmap data is reasonably sane.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bill O'Donnell <bodonnel@redhat.com>
---
 include/xfs_trans.h      |    2 
 libfrog/util.h           |    5 
 libxfs/libxfs_api_defs.h |   15 +
 libxfs/trans.c           |   48 +++
 repair/Makefile          |    2 
 repair/agbtree.c         |    2 
 repair/bmap_repair.c     |  748 ++++++++++++++++++++++++++++++++++++++++++++++
 repair/bmap_repair.h     |   13 +
 repair/bulkload.c        |  205 ++++++++++++-
 repair/bulkload.h        |   24 +
 repair/dinode.c          |   54 +++
 repair/rmap.c            |    2 
 repair/rmap.h            |    1 
 13 files changed, 1110 insertions(+), 11 deletions(-)
 create mode 100644 repair/bmap_repair.c
 create mode 100644 repair/bmap_repair.h


diff --git a/include/xfs_trans.h b/include/xfs_trans.h
index ab298ccfe..ac82c3bc4 100644
--- a/include/xfs_trans.h
+++ b/include/xfs_trans.h
@@ -98,6 +98,8 @@ int	libxfs_trans_alloc_rollable(struct xfs_mount *mp, uint blocks,
 int	libxfs_trans_alloc_empty(struct xfs_mount *mp, struct xfs_trans **tpp);
 int	libxfs_trans_commit(struct xfs_trans *);
 void	libxfs_trans_cancel(struct xfs_trans *);
+int	libxfs_trans_reserve_more(struct xfs_trans *tp, uint blocks,
+			uint rtextents);
 
 /* cancel dfops associated with a transaction */
 void xfs_defer_cancel(struct xfs_trans *);
diff --git a/libfrog/util.h b/libfrog/util.h
index 1b97881bf..5df95e69c 100644
--- a/libfrog/util.h
+++ b/libfrog/util.h
@@ -8,4 +8,9 @@
 
 unsigned int	log2_roundup(unsigned int i);
 
+#define min_t(type,x,y) \
+	({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+#define max_t(type,x,y) \
+	({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+
 #endif /* __LIBFROG_UTIL_H__ */
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 28960317a..769733ec2 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -32,7 +32,7 @@
 #define xfs_alloc_fix_freelist		libxfs_alloc_fix_freelist
 #define xfs_alloc_min_freelist		libxfs_alloc_min_freelist
 #define xfs_alloc_read_agf		libxfs_alloc_read_agf
-#define xfs_alloc_vextent		libxfs_alloc_vextent
+#define xfs_alloc_vextent_start_ag	libxfs_alloc_vextent_start_ag
 
 #define xfs_ascii_ci_hashname		libxfs_ascii_ci_hashname
 
@@ -44,11 +44,18 @@
 #define xfs_attr_shortform_verify	libxfs_attr_shortform_verify
 
 #define __xfs_bmap_add_free		__libxfs_bmap_add_free
+#define xfs_bmap_validate_extent	libxfs_bmap_validate_extent
 #define xfs_bmapi_read			libxfs_bmapi_read
+#define xfs_bmapi_remap			libxfs_bmapi_remap
 #define xfs_bmapi_write			libxfs_bmapi_write
 #define xfs_bmap_last_offset		libxfs_bmap_last_offset
+#define xfs_bmbt_calc_size		libxfs_bmbt_calc_size
+#define xfs_bmbt_commit_staged_btree	libxfs_bmbt_commit_staged_btree
+#define xfs_bmbt_disk_get_startoff	libxfs_bmbt_disk_get_startoff
+#define xfs_bmbt_disk_set_all		libxfs_bmbt_disk_set_all
 #define xfs_bmbt_maxlevels_ondisk	libxfs_bmbt_maxlevels_ondisk
 #define xfs_bmbt_maxrecs		libxfs_bmbt_maxrecs
+#define xfs_bmbt_stage_cursor		libxfs_bmbt_stage_cursor
 #define xfs_bmdr_maxrecs		libxfs_bmdr_maxrecs
 
 #define xfs_btree_bload			libxfs_btree_bload
@@ -117,6 +124,7 @@
 
 #define xfs_finobt_calc_reserves	libxfs_finobt_calc_reserves
 #define xfs_free_extent			libxfs_free_extent
+#define xfs_free_extent_later		libxfs_free_extent_later
 #define xfs_free_perag			libxfs_free_perag
 #define xfs_fs_geometry			libxfs_fs_geometry
 #define xfs_highbit32			libxfs_highbit32
@@ -127,7 +135,10 @@
 #define xfs_ialloc_read_agi		libxfs_ialloc_read_agi
 #define xfs_idata_realloc		libxfs_idata_realloc
 #define xfs_idestroy_fork		libxfs_idestroy_fork
+#define xfs_iext_first			libxfs_iext_first
+#define xfs_iext_insert_raw		libxfs_iext_insert_raw
 #define xfs_iext_lookup_extent		libxfs_iext_lookup_extent
+#define xfs_iext_next			libxfs_iext_next
 #define xfs_ifork_zap_attr		libxfs_ifork_zap_attr
 #define xfs_imap_to_bp			libxfs_imap_to_bp
 #define xfs_initialize_perag		libxfs_initialize_perag
@@ -174,10 +185,12 @@
 #define xfs_rmapbt_stage_cursor		libxfs_rmapbt_stage_cursor
 #define xfs_rmap_compare		libxfs_rmap_compare
 #define xfs_rmap_get_rec		libxfs_rmap_get_rec
+#define xfs_rmap_ino_bmbt_owner		libxfs_rmap_ino_bmbt_owner
 #define xfs_rmap_irec_offset_pack	libxfs_rmap_irec_offset_pack
 #define xfs_rmap_irec_offset_unpack	libxfs_rmap_irec_offset_unpack
 #define xfs_rmap_lookup_le		libxfs_rmap_lookup_le
 #define xfs_rmap_lookup_le_range	libxfs_rmap_lookup_le_range
+#define xfs_rmap_query_all		libxfs_rmap_query_all
 #define xfs_rmap_query_range		libxfs_rmap_query_range
 
 #define xfs_rtbitmap_getword		libxfs_rtbitmap_getword
diff --git a/libxfs/trans.c b/libxfs/trans.c
index bd1186b24..8143a6a99 100644
--- a/libxfs/trans.c
+++ b/libxfs/trans.c
@@ -1143,3 +1143,51 @@ libxfs_trans_alloc_inode(
 	*tpp = tp;
 	return 0;
 }
+
+/*
+ * Try to reserve more blocks for a transaction.  The single use case we
+ * support is for offline repair -- use a transaction to gather data without
+ * fear of btree cycle deadlocks; calculate how many blocks we really need
+ * from that data; and only then start modifying data.  This can fail due to
+ * ENOSPC, so we have to be able to cancel the transaction.
+ */
+int
+libxfs_trans_reserve_more(
+	struct xfs_trans	*tp,
+	uint			blocks,
+	uint			rtextents)
+{
+	int			error = 0;
+
+	ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
+
+	/*
+	 * Attempt to reserve the needed disk blocks by decrementing
+	 * the number needed from the number available.  This will
+	 * fail if the count would go below zero.
+	 */
+	if (blocks > 0) {
+		if (tp->t_mountp->m_sb.sb_fdblocks < blocks)
+			return -ENOSPC;
+		tp->t_blk_res += blocks;
+	}
+
+	/*
+	 * Attempt to reserve the needed realtime extents by decrementing
+	 * the number needed from the number available.  This will
+	 * fail if the count would go below zero.
+	 */
+	if (rtextents > 0) {
+		if (tp->t_mountp->m_sb.sb_rextents < rtextents) {
+			error = -ENOSPC;
+			goto out_blocks;
+		}
+	}
+
+	return 0;
+out_blocks:
+	if (blocks > 0)
+		tp->t_blk_res -= blocks;
+
+	return error;
+}
diff --git a/repair/Makefile b/repair/Makefile
index 2c40e59a3..e5014deb0 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -16,6 +16,7 @@ HFILES = \
 	avl.h \
 	bulkload.h \
 	bmap.h \
+	bmap_repair.h \
 	btree.h \
 	da_util.h \
 	dinode.h \
@@ -41,6 +42,7 @@ CFILES = \
 	avl.c \
 	bulkload.c \
 	bmap.c \
+	bmap_repair.c \
 	btree.c \
 	da_util.c \
 	dino_chunks.c \
diff --git a/repair/agbtree.c b/repair/agbtree.c
index c6f0512fe..38f3f7b8f 100644
--- a/repair/agbtree.c
+++ b/repair/agbtree.c
@@ -22,7 +22,7 @@ init_rebuild(
 {
 	memset(btr, 0, sizeof(struct bt_rebuild));
 
-	bulkload_init_ag(&btr->newbt, sc, oinfo);
+	bulkload_init_ag(&btr->newbt, sc, oinfo, NULLFSBLOCK);
 	btr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
 	bulkload_estimate_ag_slack(sc, &btr->bload, est_agfreeblocks);
 }
diff --git a/repair/bmap_repair.c b/repair/bmap_repair.c
new file mode 100644
index 000000000..1dbcafb22
--- /dev/null
+++ b/repair/bmap_repair.c
@@ -0,0 +1,748 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2019-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include <libxfs.h>
+#include "btree.h"
+#include "err_protos.h"
+#include "libxlog.h"
+#include "incore.h"
+#include "globals.h"
+#include "dinode.h"
+#include "slab.h"
+#include "rmap.h"
+#include "bulkload.h"
+#include "bmap_repair.h"
+#include "libfrog/util.h"
+
+/*
+ * Inode Fork Block Mapping (BMBT) Repair
+ * ======================================
+ *
+ * Gather all the rmap records for the inode and fork we're fixing, reset the
+ * incore fork, then recreate the btree.
+ */
+struct xrep_bmap {
+	/* List of new bmap records. */
+	struct xfs_slab		*bmap_records;
+	struct xfs_slab_cursor	*bmap_cursor;
+
+	/* New fork. */
+	struct bulkload		new_fork_info;
+	struct xfs_btree_bload	bmap_bload;
+
+	struct repair_ctx	*sc;
+
+	/* How many blocks did we find allocated to this file? */
+	xfs_rfsblock_t		nblocks;
+
+	/* How many bmbt blocks did we find for this fork? */
+	xfs_rfsblock_t		old_bmbt_block_count;
+
+	/* Which fork are we fixing? */
+	int			whichfork;
+};
+
+/* Remember this reverse-mapping as a series of bmap records. */
+STATIC int
+xrep_bmap_from_rmap(
+	struct xrep_bmap	*rb,
+	xfs_fileoff_t		startoff,
+	xfs_fsblock_t		startblock,
+	xfs_filblks_t		blockcount,
+	bool			unwritten)
+{
+	struct xfs_bmbt_rec	rbe;
+	struct xfs_bmbt_irec	irec;
+	int			error = 0;
+
+	irec.br_startoff = startoff;
+	irec.br_startblock = startblock;
+	irec.br_state = unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+
+	do {
+		xfs_failaddr_t	fa;
+
+		irec.br_blockcount = min_t(xfs_filblks_t, blockcount,
+				XFS_MAX_BMBT_EXTLEN);
+
+		fa = libxfs_bmap_validate_extent(rb->sc->ip, rb->whichfork,
+				&irec);
+		if (fa)
+			return -EFSCORRUPTED;
+
+		libxfs_bmbt_disk_set_all(&rbe, &irec);
+
+		error = slab_add(rb->bmap_records, &rbe);
+		if (error)
+			return error;
+
+		irec.br_startblock += irec.br_blockcount;
+		irec.br_startoff += irec.br_blockcount;
+		blockcount -= irec.br_blockcount;
+	} while (blockcount > 0);
+
+	return 0;
+}
+
+/* Check for any obvious errors or conflicts in the file mapping. */
+STATIC int
+xrep_bmap_check_fork_rmap(
+	struct xrep_bmap		*rb,
+	struct xfs_btree_cur		*cur,
+	const struct xfs_rmap_irec	*rec)
+{
+	struct repair_ctx		*sc = rb->sc;
+
+	/*
+	 * Data extents for rt files are never stored on the data device, but
+	 * everything else (xattrs, bmbt blocks) can be.
+	 */
+	if (XFS_IS_REALTIME_INODE(sc->ip) &&
+	    !(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))
+		return EFSCORRUPTED;
+
+	/* Check that this is within the AG. */
+	if (!xfs_verify_agbext(cur->bc_ag.pag, rec->rm_startblock,
+				rec->rm_blockcount))
+		return EFSCORRUPTED;
+
+	/* No contradictory flags. */
+	if ((rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) &&
+	    (rec->rm_flags & XFS_RMAP_UNWRITTEN))
+		return EFSCORRUPTED;
+
+	/* Check the file offset range. */
+	if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
+	    !xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
+		return EFSCORRUPTED;
+
+	return 0;
+}
+
+/* Record extents that belong to this inode's fork. */
+STATIC int
+xrep_bmap_walk_rmap(
+	struct xfs_btree_cur		*cur,
+	const struct xfs_rmap_irec	*rec,
+	void				*priv)
+{
+	struct xrep_bmap		*rb = priv;
+	struct xfs_mount		*mp = cur->bc_mp;
+	xfs_fsblock_t			fsbno;
+	int				error;
+
+	/* Skip extents which are not owned by this inode and fork. */
+	if (rec->rm_owner != rb->sc->ip->i_ino)
+		return 0;
+
+	error = xrep_bmap_check_fork_rmap(rb, cur, rec);
+	if (error)
+		return error;
+
+	/*
+	 * Record all blocks allocated to this file even if the extent isn't
+	 * for the fork we're rebuilding so that we can reset di_nblocks later.
+	 */
+	rb->nblocks += rec->rm_blockcount;
+
+	/* If this rmap isn't for the fork we want, we're done. */
+	if (rb->whichfork == XFS_DATA_FORK &&
+	    (rec->rm_flags & XFS_RMAP_ATTR_FORK))
+		return 0;
+	if (rb->whichfork == XFS_ATTR_FORK &&
+	    !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
+		return 0;
+
+	fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno,
+			rec->rm_startblock);
+
+	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
+		rb->old_bmbt_block_count += rec->rm_blockcount;
+		return 0;
+	}
+
+	return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno,
+			rec->rm_blockcount,
+			rec->rm_flags & XFS_RMAP_UNWRITTEN);
+}
+
+/* Compare two bmap extents. */
+static int
+xrep_bmap_extent_cmp(
+	const void			*a,
+	const void			*b)
+{
+	xfs_fileoff_t			ao;
+	xfs_fileoff_t			bo;
+
+	ao = libxfs_bmbt_disk_get_startoff((struct xfs_bmbt_rec *)a);
+	bo = libxfs_bmbt_disk_get_startoff((struct xfs_bmbt_rec *)b);
+
+	if (ao > bo)
+		return 1;
+	else if (ao < bo)
+		return -1;
+	return 0;
+}
+
+/* Scan one AG for reverse mappings that we can turn into extent maps. */
+STATIC int
+xrep_bmap_scan_ag(
+	struct xrep_bmap	*rb,
+	struct xfs_perag	*pag)
+{
+	struct repair_ctx	*sc = rb->sc;
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_buf		*agf_bp = NULL;
+	struct xfs_btree_cur	*cur;
+	int			error;
+
+	error = -libxfs_alloc_read_agf(pag, sc->tp, 0, &agf_bp);
+	if (error)
+		return error;
+	if (!agf_bp)
+		return ENOMEM;
+	cur = libxfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, pag);
+	error = -libxfs_rmap_query_all(cur, xrep_bmap_walk_rmap, rb);
+	libxfs_btree_del_cursor(cur, error);
+	libxfs_trans_brelse(sc->tp, agf_bp);
+	return error;
+}
+
+/*
+ * Collect block mappings for this fork of this inode and decide if we have
+ * enough space to rebuild.  Caller is responsible for cleaning up the list if
+ * anything goes wrong.
+ */
+STATIC int
+xrep_bmap_find_mappings(
+	struct xrep_bmap	*rb)
+{
+	struct xfs_perag	*pag;
+	xfs_agnumber_t		agno;
+	int			error;
+
+	/* Iterate the rmaps for extents. */
+	for_each_perag(rb->sc->mp, agno, pag) {
+		error = xrep_bmap_scan_ag(rb, pag);
+		if (error) {
+			libxfs_perag_put(pag);
+			return error;
+		}
+	}
+
+	return 0;
+}
+
+/* Retrieve bmap data for bulk load. */
+STATIC int
+xrep_bmap_get_records(
+	struct xfs_btree_cur	*cur,
+	unsigned int		idx,
+	struct xfs_btree_block	*block,
+	unsigned int		nr_wanted,
+	void			*priv)
+{
+	struct xfs_bmbt_rec	*rec;
+	struct xfs_bmbt_irec	*irec = &cur->bc_rec.b;
+	struct xrep_bmap	*rb = priv;
+	union xfs_btree_rec	*block_rec;
+	unsigned int		loaded;
+
+	for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
+		rec = pop_slab_cursor(rb->bmap_cursor);
+		libxfs_bmbt_disk_get_all(rec, irec);
+
+		block_rec = libxfs_btree_rec_addr(cur, idx, block);
+		cur->bc_ops->init_rec_from_cur(cur, block_rec);
+	}
+
+	return loaded;
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+STATIC int
+xrep_bmap_claim_block(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr,
+	void			*priv)
+{
+	struct xrep_bmap        *rb = priv;
+
+	return bulkload_claim_block(cur, &rb->new_fork_info, ptr);
+}
+
+/* Figure out how much space we need to create the incore btree root block. */
+STATIC size_t
+xrep_bmap_iroot_size(
+	struct xfs_btree_cur	*cur,
+	unsigned int		level,
+	unsigned int		nr_this_level,
+	void			*priv)
+{
+	ASSERT(level > 0);
+
+	return XFS_BMAP_BROOT_SPACE_CALC(cur->bc_mp, nr_this_level);
+}
+
+/* Update the inode counters. */
+STATIC int
+xrep_bmap_reset_counters(
+	struct xrep_bmap	*rb)
+{
+	struct repair_ctx	*sc = rb->sc;
+	struct xbtree_ifakeroot	*ifake = &rb->new_fork_info.ifake;
+	int64_t			delta;
+
+	/*
+	 * Update the inode block counts to reflect the extents we found in the
+	 * rmapbt.
+	 */
+	delta = ifake->if_blocks - rb->old_bmbt_block_count;
+	sc->ip->i_nblocks = rb->nblocks + delta;
+	libxfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+
+	/* Quotas don't exist so we're done. */
+	return 0;
+}
+
+/*
+ * Ensure that the inode being repaired is ready to handle a certain number of
+ * extents, or return EFSCORRUPTED.  Caller must hold the ILOCK of the inode
+ * being repaired and have joined it to the scrub transaction.
+ */
+static int
+xrep_ino_ensure_extent_count(
+	struct repair_ctx	*sc,
+	int			whichfork,
+	xfs_extnum_t		nextents)
+{
+	xfs_extnum_t		max_extents;
+	bool			large_extcount;
+
+	large_extcount = xfs_inode_has_large_extent_counts(sc->ip);
+	max_extents = xfs_iext_max_nextents(large_extcount, whichfork);
+	if (nextents <= max_extents)
+		return 0;
+	if (large_extcount)
+		return EFSCORRUPTED;
+	if (!xfs_has_large_extent_counts(sc->mp))
+		return EFSCORRUPTED;
+
+	max_extents = xfs_iext_max_nextents(true, whichfork);
+	if (nextents > max_extents)
+		return EFSCORRUPTED;
+
+	sc->ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
+	libxfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+	return 0;
+}
+
+/*
+ * Create a new iext tree and load it with block mappings.  If the inode is
+ * in extents format, that's all we need to do to commit the new mappings.
+ * If it is in btree format, this takes care of preloading the incore tree.
+ */
+STATIC int
+xrep_bmap_extents_load(
+	struct xrep_bmap	*rb,
+	struct xfs_btree_cur	*bmap_cur,
+	uint64_t		nextents)
+{
+	struct xfs_iext_cursor	icur;
+	struct xbtree_ifakeroot	*ifake = &rb->new_fork_info.ifake;
+	struct xfs_ifork	*ifp = ifake->if_fork;
+	unsigned int		i;
+	int			error;
+
+	ASSERT(ifp->if_bytes == 0);
+
+	error = init_slab_cursor(rb->bmap_records, xrep_bmap_extent_cmp,
+			&rb->bmap_cursor);
+	if (error)
+		return error;
+
+	/* Add all the mappings to the incore extent tree. */
+	libxfs_iext_first(ifp, &icur);
+	for (i = 0; i < nextents; i++) {
+		struct xfs_bmbt_rec	*rec;
+
+		rec = pop_slab_cursor(rb->bmap_cursor);
+		libxfs_bmbt_disk_get_all(rec, &bmap_cur->bc_rec.b);
+		libxfs_iext_insert_raw(ifp, &icur, &bmap_cur->bc_rec.b);
+		ifp->if_nextents++;
+		libxfs_iext_next(ifp, &icur);
+	}
+	free_slab_cursor(&rb->bmap_cursor);
+
+	return xrep_ino_ensure_extent_count(rb->sc, rb->whichfork,
+			ifp->if_nextents);
+}
+
+/*
+ * Reserve new btree blocks, bulk load the bmap records into the ondisk btree,
+ * and load the incore extent tree.
+ */
+STATIC int
+xrep_bmap_btree_load(
+	struct xrep_bmap	*rb,
+	struct xfs_btree_cur	*bmap_cur,
+	uint64_t		nextents)
+{
+	struct repair_ctx	*sc = rb->sc;
+	int			error;
+
+	rb->bmap_bload.get_records = xrep_bmap_get_records;
+	rb->bmap_bload.claim_block = xrep_bmap_claim_block;
+	rb->bmap_bload.iroot_size = xrep_bmap_iroot_size;
+	rb->bmap_bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
+
+	/*
+	 * Always make the btree as small as possible, since we might need the
+	 * space to rebuild the space metadata btrees in later phases.
+	 */
+	rb->bmap_bload.leaf_slack = 0;
+	rb->bmap_bload.node_slack = 0;
+
+	/* Compute how many blocks we'll need. */
+	error = -libxfs_btree_bload_compute_geometry(bmap_cur, &rb->bmap_bload,
+			nextents);
+	if (error)
+		return error;
+
+	/*
+	 * Guess how many blocks we're going to need to rebuild an entire bmap
+	 * from the number of extents we found, and pump up our transaction to
+	 * have sufficient block reservation.
+	 */
+	error = -libxfs_trans_reserve_more(sc->tp, rb->bmap_bload.nr_blocks, 0);
+	if (error)
+		return error;
+
+	/* Reserve the space we'll need for the new btree. */
+	error = bulkload_alloc_file_blocks(&rb->new_fork_info,
+			rb->bmap_bload.nr_blocks);
+	if (error)
+		return error;
+
+	/* Add all observed bmap records. */
+	error = init_slab_cursor(rb->bmap_records, xrep_bmap_extent_cmp,
+			&rb->bmap_cursor);
+	if (error)
+		return error;
+	error = -libxfs_btree_bload(bmap_cur, &rb->bmap_bload, rb);
+	free_slab_cursor(&rb->bmap_cursor);
+	if (error)
+	       return error;
+
+	/*
+	 * Load the new bmap records into the new incore extent tree to
+	 * preserve delalloc reservations for regular files.  The directory
+	 * code loads the extent tree during xfs_dir_open and assumes
+	 * thereafter that it remains loaded, so we must not violate that
+	 * assumption.
+	 */
+	return xrep_bmap_extents_load(rb, bmap_cur, nextents);
+}
+
+/*
+ * Use the collected bmap information to stage a new bmap fork.  If this is
+ * successful we'll return with the new fork information logged to the repair
+ * transaction but not yet committed.
+ */
+STATIC int
+xrep_bmap_build_new_fork(
+	struct xrep_bmap	*rb)
+{
+	struct xfs_owner_info	oinfo;
+	struct repair_ctx	*sc = rb->sc;
+	struct xfs_btree_cur	*bmap_cur;
+	struct xbtree_ifakeroot	*ifake = &rb->new_fork_info.ifake;
+	uint64_t		nextents;
+	int			error;
+
+	/*
+	 * Sort the bmap extents by startblock to avoid btree splits when we
+	 * rebuild the bmbt btree.
+	 */
+	qsort_slab(rb->bmap_records, xrep_bmap_extent_cmp);
+
+	/*
+	 * Prepare to construct the new fork by initializing the new btree
+	 * structure and creating a fake ifork in the ifakeroot structure.
+	 */
+	libxfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
+	bulkload_init_inode(&rb->new_fork_info, sc, rb->whichfork, &oinfo);
+	bmap_cur = libxfs_bmbt_stage_cursor(sc->mp, sc->ip, ifake);
+
+	/*
+	 * Figure out the size and format of the new fork, then fill it with
+	 * all the bmap records we've found.  Join the inode to the transaction
+	 * so that we can roll the transaction while holding the inode locked.
+	 */
+	libxfs_trans_ijoin(sc->tp, sc->ip, 0);
+	nextents = slab_count(rb->bmap_records);
+	if (nextents <= XFS_IFORK_MAXEXT(sc->ip, rb->whichfork)) {
+		ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS;
+		error = xrep_bmap_extents_load(rb, bmap_cur, nextents);
+	} else {
+		ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE;
+		error = xrep_bmap_btree_load(rb, bmap_cur, nextents);
+	}
+	if (error)
+		goto err_cur;
+
+	/*
+	 * Install the new fork in the inode.  After this point the old mapping
+	 * data are no longer accessible and the new tree is live.  We delete
+	 * the cursor immediately after committing the staged root because the
+	 * staged fork might be in extents format.
+	 */
+	libxfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork);
+	libxfs_btree_del_cursor(bmap_cur, 0);
+
+	/* Reset the inode counters now that we've changed the fork. */
+	error = xrep_bmap_reset_counters(rb);
+	if (error)
+		goto err_newbt;
+
+	/* Dispose of any unused blocks and the accounting infomation. */
+	error = bulkload_commit(&rb->new_fork_info);
+	if (error)
+		return error;
+
+	return -libxfs_trans_roll_inode(&sc->tp, sc->ip);
+err_cur:
+	if (bmap_cur)
+		libxfs_btree_del_cursor(bmap_cur, error);
+err_newbt:
+	bulkload_cancel(&rb->new_fork_info);
+	return error;
+}
+
+/* Check for garbage inputs.  Returns ECANCELED if there's nothing to do. */
+STATIC int
+xrep_bmap_check_inputs(
+	struct repair_ctx	*sc,
+	int			whichfork)
+{
+	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, whichfork);
+
+	ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
+
+	if (!xfs_has_rmapbt(sc->mp))
+		return EOPNOTSUPP;
+
+	/* No fork means nothing to rebuild. */
+	if (!ifp)
+		return ECANCELED;
+
+	/*
+	 * We only know how to repair extent mappings, which is to say that we
+	 * only support extents and btree fork format.  Repairs to a local
+	 * format fork require a higher level repair function, so we do not
+	 * have any work to do here.
+	 */
+	switch (ifp->if_format) {
+	case XFS_DINODE_FMT_DEV:
+	case XFS_DINODE_FMT_LOCAL:
+	case XFS_DINODE_FMT_UUID:
+		return ECANCELED;
+	case XFS_DINODE_FMT_EXTENTS:
+	case XFS_DINODE_FMT_BTREE:
+		break;
+	default:
+		return EFSCORRUPTED;
+	}
+
+	if (whichfork == XFS_ATTR_FORK)
+		return 0;
+
+	/* Only files, symlinks, and directories get to have data forks. */
+	switch (VFS_I(sc->ip)->i_mode & S_IFMT) {
+	case S_IFREG:
+	case S_IFDIR:
+	case S_IFLNK:
+		/* ok */
+		break;
+	default:
+		return EINVAL;
+	}
+
+	/* Don't know how to rebuild realtime data forks. */
+	if (XFS_IS_REALTIME_INODE(sc->ip))
+		return EOPNOTSUPP;
+
+	return 0;
+}
+
+/* Repair an inode fork. */
+STATIC int
+xrep_bmap(
+	struct repair_ctx	*sc,
+	int			whichfork)
+{
+	struct xrep_bmap	*rb;
+	int			error = 0;
+
+	error = xrep_bmap_check_inputs(sc, whichfork);
+	if (error == ECANCELED)
+		return 0;
+	if (error)
+		return error;
+
+	rb = kmem_zalloc(sizeof(struct xrep_bmap), KM_NOFS | KM_MAYFAIL);
+	if (!rb)
+		return ENOMEM;
+	rb->sc = sc;
+	rb->whichfork = whichfork;
+
+	/* Set up some storage */
+	error = init_slab(&rb->bmap_records, sizeof(struct xfs_bmbt_rec));
+	if (error)
+		goto out_rb;
+
+	/* Collect all reverse mappings for this fork's extents. */
+	error = xrep_bmap_find_mappings(rb);
+	if (error)
+		goto out_bitmap;
+
+	/* Rebuild the bmap information. */
+	error = xrep_bmap_build_new_fork(rb);
+
+	/*
+	 * We don't need to free the old bmbt blocks because we're rebuilding
+	 * all the space metadata later.
+	 */
+
+out_bitmap:
+	free_slab(&rb->bmap_records);
+out_rb:
+	kmem_free(rb);
+	return error;
+}
+
+/* Rebuild some inode's bmap. */
+int
+rebuild_bmap(
+	struct xfs_mount	*mp,
+	xfs_ino_t		ino,
+	int			whichfork,
+	unsigned long		nr_extents,
+	struct xfs_buf		**ino_bpp,
+	struct xfs_dinode	**dinop,
+	int			*dirty)
+{
+	struct repair_ctx	sc = {
+		.mp		= mp,
+	};
+	const struct xfs_buf_ops *bp_ops;
+	unsigned long		boffset;
+	unsigned long long	resblks;
+	xfs_daddr_t		bp_bn;
+	int			bp_length;
+	int			error, err2;
+
+	bp_bn = xfs_buf_daddr(*ino_bpp);
+	bp_length = (*ino_bpp)->b_length;
+	bp_ops = (*ino_bpp)->b_ops;
+	boffset = (char *)(*dinop) - (char *)(*ino_bpp)->b_addr;
+
+	/*
+	 * Bail out if the inode didn't think it had extents.  Otherwise, zap
+	 * it back to a zero-extents fork so that we can rebuild it.
+	 */
+	switch (whichfork) {
+	case XFS_DATA_FORK:
+		if ((*dinop)->di_nextents == 0)
+			return 0;
+		(*dinop)->di_format = XFS_DINODE_FMT_EXTENTS;
+		(*dinop)->di_nextents = 0;
+		libxfs_dinode_calc_crc(mp, *dinop);
+		*dirty = 1;
+		break;
+	case XFS_ATTR_FORK:
+		if ((*dinop)->di_anextents == 0)
+			return 0;
+		(*dinop)->di_aformat = XFS_DINODE_FMT_EXTENTS;
+		(*dinop)->di_anextents = 0;
+		libxfs_dinode_calc_crc(mp, *dinop);
+		*dirty = 1;
+		break;
+	default:
+		return EINVAL;
+	}
+
+	resblks = libxfs_bmbt_calc_size(mp, nr_extents);
+	error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, 0,
+			0, &sc.tp);
+	if (error)
+		return error;
+
+	/*
+	 * Repair magic: the caller passed us the inode cluster buffer for the
+	 * inode.  The _iget call grabs the buffer to load the incore inode, so
+	 * the buffer must be attached to the transaction to avoid recursing
+	 * the buffer lock.
+	 *
+	 * Unfortunately, the _iget call drops the buffer once the inode is
+	 * loaded, so if we've made any changes we have to log the buffer, hold
+	 * it, and roll the transaction.  This persists the caller's changes
+	 * and maintains our ownership of the cluster buffer.
+	 */
+	libxfs_trans_bjoin(sc.tp, *ino_bpp);
+	if (*dirty) {
+		unsigned int	end = BBTOB((*ino_bpp)->b_length) - 1;
+
+		libxfs_trans_log_buf(sc.tp, *ino_bpp, 0, end);
+		*dirty = 0;
+
+		libxfs_trans_bhold(sc.tp, *ino_bpp);
+		error = -libxfs_trans_roll(&sc.tp);
+		libxfs_trans_bjoin(sc.tp, *ino_bpp);
+		if (error)
+			goto out_cancel;
+	}
+
+	/* Grab the inode and fix the bmbt. */
+	error = -libxfs_iget(mp, sc.tp, ino, 0, &sc.ip);
+	if (error)
+		goto out_cancel;
+	error = xrep_bmap(&sc, whichfork);
+	if (error)
+		libxfs_trans_cancel(sc.tp);
+	else
+		error = -libxfs_trans_commit(sc.tp);
+
+	/*
+	 * Rebuilding the inode fork rolled the transaction, so we need to
+	 * re-grab the inode cluster buffer and dinode pointer for the caller.
+	 */
+	err2 = -libxfs_imap_to_bp(mp, NULL, &sc.ip->i_imap, ino_bpp);
+	if (err2)
+		do_error(
+ _("Unable to re-grab inode cluster buffer after failed repair of inode %llu, error %d.\n"),
+				(unsigned long long)ino, err2);
+	*dinop = xfs_buf_offset(*ino_bpp, sc.ip->i_imap.im_boffset);
+	libxfs_irele(sc.ip);
+
+	return error;
+
+out_cancel:
+	libxfs_trans_cancel(sc.tp);
+
+	/*
+	 * Try to regrab the old buffer so we have something to return to the
+	 * caller.
+	 */
+	err2 = -libxfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, bp_bn,
+			bp_length, 0, ino_bpp, bp_ops);
+	if (err2)
+		do_error(
+ _("Unable to re-grab inode cluster buffer after failed repair of inode %llu, error %d.\n"),
+				(unsigned long long)ino, err2);
+	*dinop = xfs_buf_offset(*ino_bpp, boffset);
+	return error;
+}
diff --git a/repair/bmap_repair.h b/repair/bmap_repair.h
new file mode 100644
index 000000000..6d5535949
--- /dev/null
+++ b/repair/bmap_repair.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2019-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef REBUILD_H_
+#define REBUILD_H_
+
+int rebuild_bmap(struct xfs_mount *mp, xfs_ino_t ino, int whichfork,
+		 unsigned long nr_extents, struct xfs_buf **ino_bpp,
+		 struct xfs_dinode **dinop, int *dirty);
+
+#endif /* REBUILD_H_ */
diff --git a/repair/bulkload.c b/repair/bulkload.c
index 18158c397..a97839f54 100644
--- a/repair/bulkload.c
+++ b/repair/bulkload.c
@@ -14,14 +14,29 @@ void
 bulkload_init_ag(
 	struct bulkload			*bkl,
 	struct repair_ctx		*sc,
-	const struct xfs_owner_info	*oinfo)
+	const struct xfs_owner_info	*oinfo,
+	xfs_fsblock_t			alloc_hint)
 {
 	memset(bkl, 0, sizeof(struct bulkload));
 	bkl->sc = sc;
 	bkl->oinfo = *oinfo; /* structure copy */
+	bkl->alloc_hint = alloc_hint;
 	INIT_LIST_HEAD(&bkl->resv_list);
 }
 
+/* Initialize accounting resources for staging a new inode fork btree. */
+void
+bulkload_init_inode(
+	struct bulkload			*bkl,
+	struct repair_ctx		*sc,
+	int				whichfork,
+	const struct xfs_owner_info	*oinfo)
+{
+	bulkload_init_ag(bkl, sc, oinfo, XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino));
+	bkl->ifake.if_fork = kmem_cache_zalloc(xfs_ifork_cache, 0);
+	bkl->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, whichfork);
+}
+
 /* Designate specific blocks to be used to build our new btree. */
 static int
 bulkload_add_blocks(
@@ -71,17 +86,199 @@ bulkload_add_extent(
 	return bulkload_add_blocks(bkl, pag, &args);
 }
 
+/* Don't let our allocation hint take us beyond EOFS */
+static inline void
+bulkload_validate_file_alloc_hint(
+	struct bulkload		*bkl)
+{
+	struct repair_ctx	*sc = bkl->sc;
+
+	if (libxfs_verify_fsbno(sc->mp, bkl->alloc_hint))
+		return;
+
+	bkl->alloc_hint = XFS_AGB_TO_FSB(sc->mp, 0, XFS_AGFL_BLOCK(sc->mp) + 1);
+}
+
+/* Allocate disk space for our new file-based btree. */
+int
+bulkload_alloc_file_blocks(
+	struct bulkload		*bkl,
+	uint64_t		nr_blocks)
+{
+	struct repair_ctx	*sc = bkl->sc;
+	struct xfs_mount	*mp = sc->mp;
+	int			error = 0;
+
+	while (nr_blocks > 0) {
+		struct xfs_alloc_arg	args = {
+			.tp		= sc->tp,
+			.mp		= mp,
+			.oinfo		= bkl->oinfo,
+			.minlen		= 1,
+			.maxlen		= nr_blocks,
+			.prod		= 1,
+			.resv		= XFS_AG_RESV_NONE,
+		};
+		struct xfs_perag	*pag;
+		xfs_agnumber_t		agno;
+
+		bulkload_validate_file_alloc_hint(bkl);
+
+		error = -libxfs_alloc_vextent_start_ag(&args, bkl->alloc_hint);
+		if (error)
+			return error;
+		if (args.fsbno == NULLFSBLOCK)
+			return ENOSPC;
+
+		agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
+
+		pag = libxfs_perag_get(mp, agno);
+		if (!pag) {
+			ASSERT(0);
+			return -EFSCORRUPTED;
+		}
+
+		error = bulkload_add_blocks(bkl, pag, &args);
+		libxfs_perag_put(pag);
+		if (error)
+			return error;
+
+		nr_blocks -= args.len;
+		bkl->alloc_hint = args.fsbno + args.len;
+
+		error = -libxfs_defer_finish(&sc->tp);
+		if (error)
+			return error;
+	}
+
+	return 0;
+}
+
+/*
+ * Free the unused part of a space extent that was reserved for a new ondisk
+ * structure.  Returns the number of EFIs logged or a negative errno.
+ */
+static inline int
+bulkload_free_extent(
+	struct bulkload		*bkl,
+	struct bulkload_resv	*resv,
+	bool			btree_committed)
+{
+	struct repair_ctx	*sc = bkl->sc;
+	xfs_agblock_t		free_agbno = resv->agbno;
+	xfs_extlen_t		free_aglen = resv->len;
+	xfs_fsblock_t		fsbno;
+	int			error;
+
+	if (!btree_committed || resv->used == 0) {
+		/*
+		 * If we're not committing a new btree or we didn't use the
+		 * space reservation, free the entire space extent.
+		 */
+		goto free;
+	}
+
+	/*
+	 * We used space and committed the btree.  Remove the written blocks
+	 * from the reservation and possibly log a new EFI to free any unused
+	 * reservation space.
+	 */
+	free_agbno += resv->used;
+	free_aglen -= resv->used;
+
+	if (free_aglen == 0)
+		return 0;
+
+free:
+	/*
+	 * Use EFIs to free the reservations.  We don't need to use EFIs here
+	 * like the kernel, but we'll do it to keep the code matched.
+	 */
+	fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno);
+	error = -libxfs_free_extent_later(sc->tp, fsbno, free_aglen,
+			&bkl->oinfo, XFS_AG_RESV_NONE, true);
+	if (error)
+		return error;
+
+	return 1;
+}
+
 /* Free all the accounting info and disk space we reserved for a new btree. */
-void
-bulkload_commit(
-	struct bulkload		*bkl)
+static int
+bulkload_free(
+	struct bulkload		*bkl,
+	bool			btree_committed)
 {
+	struct repair_ctx	*sc = bkl->sc;
 	struct bulkload_resv	*resv, *n;
+	unsigned int		freed = 0;
+	int			error = 0;
 
 	list_for_each_entry_safe(resv, n, &bkl->resv_list, list) {
+		int		ret;
+
+		ret = bulkload_free_extent(bkl, resv, btree_committed);
 		list_del(&resv->list);
+		libxfs_perag_put(resv->pag);
 		kfree(resv);
+
+		if (ret < 0) {
+			error = ret;
+			goto junkit;
+		}
+
+		freed += ret;
+		if (freed >= XREP_MAX_ITRUNCATE_EFIS) {
+			error = -libxfs_defer_finish(&sc->tp);
+			if (error)
+				goto junkit;
+			freed = 0;
+		}
 	}
+
+	if (freed)
+		error = -libxfs_defer_finish(&sc->tp);
+junkit:
+	/*
+	 * If we still have reservations attached to @newbt, cleanup must have
+	 * failed and the filesystem is about to go down.  Clean up the incore
+	 * reservations.
+	 */
+	list_for_each_entry_safe(resv, n, &bkl->resv_list, list) {
+		list_del(&resv->list);
+		libxfs_perag_put(resv->pag);
+		kfree(resv);
+	}
+
+	if (sc->ip) {
+		kmem_cache_free(xfs_ifork_cache, bkl->ifake.if_fork);
+		bkl->ifake.if_fork = NULL;
+	}
+
+	return error;
+}
+
+/*
+ * Free all the accounting info and unused disk space allocations after
+ * committing a new btree.
+ */
+int
+bulkload_commit(
+	struct bulkload		*bkl)
+{
+	return bulkload_free(bkl, true);
+}
+
+/*
+ * Free all the accounting info and all of the disk space we reserved for a new
+ * btree that we're not going to commit.  We want to try to roll things back
+ * cleanly for things like ENOSPC midway through allocation.
+ */
+void
+bulkload_cancel(
+	struct bulkload		*bkl)
+{
+	bulkload_free(bkl, false);
 }
 
 /* Feed one of the reserved btree blocks to the bulk loader. */
diff --git a/repair/bulkload.h b/repair/bulkload.h
index f4790e3b3..a88aafaa6 100644
--- a/repair/bulkload.h
+++ b/repair/bulkload.h
@@ -8,9 +8,17 @@
 
 extern int bload_leaf_slack;
 extern int bload_node_slack;
+/*
+ * This is the maximum number of deferred extent freeing item extents (EFIs)
+ * that we'll attach to a transaction without rolling the transaction to avoid
+ * overrunning a tr_itruncate reservation.
+ */
+#define XREP_MAX_ITRUNCATE_EFIS	(128)
 
 struct repair_ctx {
 	struct xfs_mount	*mp;
+	struct xfs_inode	*ip;
+	struct xfs_trans	*tp;
 };
 
 struct bulkload_resv {
@@ -36,7 +44,10 @@ struct bulkload {
 	struct list_head	resv_list;
 
 	/* Fake root for new btree. */
-	struct xbtree_afakeroot	afake;
+	union {
+		struct xbtree_afakeroot	afake;
+		struct xbtree_ifakeroot	ifake;
+	};
 
 	/* rmap owner of these blocks */
 	struct xfs_owner_info	oinfo;
@@ -44,6 +55,9 @@ struct bulkload {
 	/* The last reservation we allocated from. */
 	struct bulkload_resv	*last_resv;
 
+	/* Hint as to where we should allocate blocks. */
+	xfs_fsblock_t		alloc_hint;
+
 	/* Number of blocks reserved via resv_list. */
 	unsigned int		nr_reserved;
 };
@@ -52,12 +66,16 @@ struct bulkload {
 	list_for_each_entry_safe((resv), (n), &(bkl)->resv_list, list)
 
 void bulkload_init_ag(struct bulkload *bkl, struct repair_ctx *sc,
-		const struct xfs_owner_info *oinfo);
+		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint);
+void bulkload_init_inode(struct bulkload *bkl, struct repair_ctx *sc,
+		int whichfork, const struct xfs_owner_info *oinfo);
 int bulkload_claim_block(struct xfs_btree_cur *cur, struct bulkload *bkl,
 		union xfs_btree_ptr *ptr);
 int bulkload_add_extent(struct bulkload *bkl, struct xfs_perag *pag,
 		xfs_agblock_t agbno, xfs_extlen_t len);
-void bulkload_commit(struct bulkload *bkl);
+int bulkload_alloc_file_blocks(struct bulkload *bkl, uint64_t nr_blocks);
+void bulkload_cancel(struct bulkload *bkl);
+int bulkload_commit(struct bulkload *bkl);
 void bulkload_estimate_ag_slack(struct repair_ctx *sc,
 		struct xfs_btree_bload *bload, unsigned int free);
 
diff --git a/repair/dinode.c b/repair/dinode.c
index a18af3ff7..b8f5bf4e5 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -20,6 +20,7 @@
 #include "threads.h"
 #include "slab.h"
 #include "rmap.h"
+#include "bmap_repair.h"
 
 /*
  * gettext lookups for translations of strings use mutexes internally to
@@ -1909,7 +1910,9 @@ process_inode_data_fork(
 	xfs_ino_t		lino = XFS_AGINO_TO_INO(mp, agno, ino);
 	int			err = 0;
 	xfs_extnum_t		nex, max_nex;
+	int			try_rebuild = -1; /* don't know yet */
 
+retry:
 	/*
 	 * extent count on disk is only valid for positive values. The kernel
 	 * uses negative values in memory. hence if we see negative numbers
@@ -1938,11 +1941,15 @@ process_inode_data_fork(
 		*totblocks = 0;
 		break;
 	case XFS_DINODE_FMT_EXTENTS:
+		if (!rmapbt_suspect && try_rebuild == -1)
+			try_rebuild = 1;
 		err = process_exinode(mp, agno, ino, dino, type, dirty,
 			totblocks, nextents, dblkmap, XFS_DATA_FORK,
 			check_dups);
 		break;
 	case XFS_DINODE_FMT_BTREE:
+		if (!rmapbt_suspect && try_rebuild == -1)
+			try_rebuild = 1;
 		err = process_btinode(mp, agno, ino, dino, type, dirty,
 			totblocks, nextents, dblkmap, XFS_DATA_FORK,
 			check_dups);
@@ -1958,8 +1965,28 @@ process_inode_data_fork(
 	if (err)  {
 		do_warn(_("bad data fork in inode %" PRIu64 "\n"), lino);
 		if (!no_modify)  {
+			if (try_rebuild == 1) {
+				do_warn(
+_("rebuilding inode %"PRIu64" data fork\n"),
+					lino);
+				try_rebuild = 0;
+				err = rebuild_bmap(mp, lino, XFS_DATA_FORK,
+						be32_to_cpu(dino->di_nextents),
+						ino_bpp, dinop, dirty);
+				dino = *dinop;
+				if (!err)
+					goto retry;
+				do_warn(
+_("inode %"PRIu64" data fork rebuild failed, error %d, clearing\n"),
+					lino, err);
+			}
 			clear_dinode(mp, dino, lino);
 			*dirty += 1;
+			ASSERT(*dirty > 0);
+		} else if (try_rebuild == 1) {
+			do_warn(
+_("would have tried to rebuild inode %"PRIu64" data fork\n"),
+					lino);
 		}
 		return 1;
 	}
@@ -2025,7 +2052,9 @@ process_inode_attr_fork(
 	struct blkmap		*ablkmap = NULL;
 	int			repair = 0;
 	int			err;
+	int			try_rebuild = -1; /* don't know yet */
 
+retry:
 	if (!dino->di_forkoff) {
 		*anextents = 0;
 		if (dino->di_aformat != XFS_DINODE_FMT_EXTENTS) {
@@ -2052,6 +2081,8 @@ process_inode_attr_fork(
 		err = process_lclinode(mp, agno, ino, dino, XFS_ATTR_FORK);
 		break;
 	case XFS_DINODE_FMT_EXTENTS:
+		if (!rmapbt_suspect && try_rebuild == -1)
+			try_rebuild = 1;
 		ablkmap = blkmap_alloc(*anextents, XFS_ATTR_FORK);
 		*anextents = 0;
 		err = process_exinode(mp, agno, ino, dino, type, dirty,
@@ -2059,6 +2090,8 @@ process_inode_attr_fork(
 				XFS_ATTR_FORK, check_dups);
 		break;
 	case XFS_DINODE_FMT_BTREE:
+		if (!rmapbt_suspect && try_rebuild == -1)
+			try_rebuild = 1;
 		ablkmap = blkmap_alloc(*anextents, XFS_ATTR_FORK);
 		*anextents = 0;
 		err = process_btinode(mp, agno, ino, dino, type, dirty,
@@ -2084,10 +2117,29 @@ process_inode_attr_fork(
 		do_warn(_("bad attribute fork in inode %" PRIu64 "\n"), lino);
 
 		if (!no_modify)  {
+			if (try_rebuild == 1) {
+				do_warn(
+_("rebuilding inode %"PRIu64" attr fork\n"),
+					lino);
+				try_rebuild = 0;
+				err = rebuild_bmap(mp, lino, XFS_ATTR_FORK,
+						be16_to_cpu(dino->di_anextents),
+						ino_bpp, dinop, dirty);
+				dino = *dinop;
+				if (!err)
+					goto retry;
+				do_warn(
+_("inode %"PRIu64" attr fork rebuild failed, error %d"),
+					lino, err);
+			}
 			do_warn(_(", clearing attr fork\n"));
 			*dirty += clear_dinode_attr(mp, dino, lino);
 			ASSERT(*dirty > 0);
-		} else  {
+		} else if (try_rebuild) {
+			do_warn(
+_("would have tried to rebuild inode %"PRIu64" attr fork or cleared it\n"),
+					lino);
+		} else {
 			do_warn(_(", would clear attr fork\n"));
 		}
 
diff --git a/repair/rmap.c b/repair/rmap.c
index 6bb77e082..a2291c7b3 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -33,7 +33,7 @@ struct xfs_ag_rmap {
 };
 
 static struct xfs_ag_rmap *ag_rmaps;
-static bool rmapbt_suspect;
+bool rmapbt_suspect;
 static bool refcbt_suspect;
 
 static inline int rmap_compare(const void *a, const void *b)
diff --git a/repair/rmap.h b/repair/rmap.h
index 6004e9f68..1dad2f589 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -7,6 +7,7 @@
 #define RMAP_H_
 
 extern bool collect_rmaps;
+extern bool rmapbt_suspect;
 
 extern bool rmap_needs_work(struct xfs_mount *);

next prev parent reply	other threads:[~2024-04-17 21:44 UTC|newest]

Thread overview: 130+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-17 21:11 [PATCHBOMB] xfsprogs: catch us up to 6.8, at least Darrick J. Wong
2024-04-17 21:15 ` [PATCHSET 01/11] xfsprogs: packaging fixes for 6.7 Darrick J. Wong
2024-04-17 21:18   ` [PATCH 1/2] debian: fix package configuration after removing platform_defs.h.in Darrick J. Wong
2024-05-01 18:00     ` Bill O'Donnell
2024-05-01 18:06       ` Bill O'Donnell
2024-04-17 21:18   ` [PATCH 2/2] libxfs: fix incorrect porting to 6.7 Darrick J. Wong
2024-04-17 21:15 ` [PATCHSET 02/11] xfsprogs: minor fixes for 6.7 Darrick J. Wong
2024-04-17 21:18   ` [PATCH 1/1] mkfs: fix log sunit rounding when external logs are in use Darrick J. Wong
2024-04-17 21:16 ` [PATCHSET 03/11] xfsprogs: convert utilities to use new rt helpers Darrick J. Wong
2024-04-17 21:19   ` [PATCH 01/11] xfs_repair: fix confusing rt space units in the duplicate detection code Darrick J. Wong
2024-04-17 21:19   ` [PATCH 02/11] libxfs: create a helper to compute leftovers of realtime extents Darrick J. Wong
2024-04-17 21:19   ` [PATCH 03/11] libxfs: use helpers to convert rt block numbers to rt extent numbers Darrick J. Wong
2024-04-17 21:19   ` [PATCH 04/11] xfs_repair: convert utility to use new rt extent helpers and types Darrick J. Wong
2024-04-17 21:20   ` [PATCH 05/11] mkfs: " Darrick J. Wong
2024-04-17 21:20   ` [PATCH 06/11] xfs_{db,repair}: convert open-coded xfs_rtword_t pointer accesses to helper Darrick J. Wong
2024-04-17 21:20   ` [PATCH 07/11] xfs_repair: convert helpers for rtbitmap block/wordcount computations Darrick J. Wong
2024-04-17 21:20   ` [PATCH 08/11] xfs_{db,repair}: use accessor functions for bitmap words Darrick J. Wong
2024-04-17 21:21   ` [PATCH 09/11] xfs_{db,repair}: use helpers for rtsummary block/wordcount computations Darrick J. Wong
2024-04-17 21:21   ` [PATCH 10/11] xfs_{db,repair}: use accessor functions for summary info words Darrick J. Wong
2024-04-17 21:21   ` [PATCH 11/11] xfs_{db,repair}: use m_blockwsize instead of sb_blocksize for rt blocks Darrick J. Wong
2024-04-17 21:16 ` [PATCHSET 04/11] libxfs: sync with 6.8 Darrick J. Wong
2024-04-17 21:22   ` [PATCH 01/67] xfs: use xfs_defer_pending objects to recover intent items Darrick J. Wong
2024-04-17 21:22   ` [PATCH 02/67] xfs: recreate work items when recovering " Darrick J. Wong
2024-04-17 21:22   ` [PATCH 03/67] xfs: use xfs_defer_finish_one to finish recovered work items Darrick J. Wong
2024-04-17 21:22   ` [PATCH 04/67] xfs: move ->iop_recover to xfs_defer_op_type Darrick J. Wong
2024-04-17 21:23   ` [PATCH 05/67] xfs: hoist intent done flag setting to ->finish_item callsite Darrick J. Wong
2024-04-17 21:23   ` [PATCH 06/67] xfs: hoist ->create_intent boilerplate to its callsite Darrick J. Wong
2024-04-17 21:23   ` [PATCH 07/67] xfs: use xfs_defer_create_done for the relogging operation Darrick J. Wong
2024-04-17 21:23   ` [PATCH 08/67] xfs: clean out XFS_LI_DIRTY setting boilerplate from ->iop_relog Darrick J. Wong
2024-04-17 21:24   ` [PATCH 09/67] xfs: hoist xfs_trans_add_item calls to defer ops functions Darrick J. Wong
2024-04-17 21:24   ` [PATCH 10/67] xfs: move ->iop_relog to struct xfs_defer_op_type Darrick J. Wong
2024-04-17 21:24   ` [PATCH 11/67] xfs: make rextslog computation consistent with mkfs Darrick J. Wong
2024-04-17 21:24   ` [PATCH 12/67] xfs: fix 32-bit truncation in xfs_compute_rextslog Darrick J. Wong
2024-04-17 21:25   ` [PATCH 13/67] xfs: don't allow overly small or large realtime volumes Darrick J. Wong
2024-04-17 21:25   ` [PATCH 14/67] xfs: elide ->create_done calls for unlogged deferred work Darrick J. Wong
2024-04-17 21:25   ` [PATCH 15/67] xfs: don't append work items to logged xfs_defer_pending objects Darrick J. Wong
2024-04-17 21:25   ` [PATCH 16/67] xfs: allow pausing of pending deferred work items Darrick J. Wong
2024-04-17 21:26   ` [PATCH 17/67] xfs: remove __xfs_free_extent_later Darrick J. Wong
2024-04-17 21:26   ` [PATCH 18/67] xfs: automatic freeing of freshly allocated unwritten space Darrick J. Wong
2024-04-17 21:26   ` [PATCH 19/67] xfs: remove unused fields from struct xbtree_ifakeroot Darrick J. Wong
2024-04-17 21:26   ` [PATCH 20/67] xfs: force small EFIs for reaping btree extents Darrick J. Wong
2024-04-17 21:27   ` [PATCH 21/67] xfs: ensure logflagsp is initialized in xfs_bmap_del_extent_real Darrick J. Wong
2024-04-17 21:27   ` [PATCH 22/67] xfs: update dir3 leaf block metadata after swap Darrick J. Wong
2024-04-17 21:27   ` [PATCH 23/67] xfs: extract xfs_da_buf_copy() helper function Darrick J. Wong
2024-04-17 21:28   ` [PATCH 24/67] xfs: move xfs_ondisk.h to libxfs/ Darrick J. Wong
2024-04-17 21:28   ` [PATCH 25/67] xfs: consolidate the xfs_attr_defer_* helpers Darrick J. Wong
2024-04-17 21:28   ` [PATCH 26/67] xfs: store an ops pointer in struct xfs_defer_pending Darrick J. Wong
2024-04-17 21:28   ` [PATCH 27/67] xfs: pass the defer ops instead of type to xfs_defer_start_recovery Darrick J. Wong
2024-04-17 21:29   ` [PATCH 28/67] xfs: pass the defer ops directly to xfs_defer_add Darrick J. Wong
2024-04-17 21:29   ` [PATCH 29/67] xfs: force all buffers to be written during btree bulk load Darrick J. Wong
2024-04-17 21:29   ` [PATCH 30/67] xfs: set XBF_DONE on newly formatted btree block that are ready for writing Darrick J. Wong
2024-04-17 21:29   ` [PATCH 31/67] xfs: read leaf blocks when computing keys for bulkloading into node blocks Darrick J. Wong
2024-04-17 21:30   ` [PATCH 32/67] xfs: move btree bulkload record initialization to ->get_record implementations Darrick J. Wong
2024-04-17 21:30   ` [PATCH 33/67] xfs: constrain dirty buffers while formatting a staged btree Darrick J. Wong
2024-04-17 21:30   ` [PATCH 34/67] xfs: repair free space btrees Darrick J. Wong
2024-04-17 21:30   ` [PATCH 35/67] xfs: repair inode btrees Darrick J. Wong
2024-04-17 21:31   ` [PATCH 36/67] xfs: repair refcount btrees Darrick J. Wong
2024-04-17 21:31   ` [PATCH 37/67] xfs: dont cast to char * for XFS_DFORK_*PTR macros Darrick J. Wong
2024-04-17 21:31   ` [PATCH 38/67] xfs: set inode sick state flags when we zap either ondisk fork Darrick J. Wong
2024-04-17 21:31   ` [PATCH 39/67] xfs: zap broken inode forks Darrick J. Wong
2024-04-17 21:32   ` [PATCH 40/67] xfs: repair inode fork block mapping data structures Darrick J. Wong
2024-04-17 21:32   ` [PATCH 41/67] xfs: create a ranged query function for refcount btrees Darrick J. Wong
2024-04-17 21:32   ` [PATCH 42/67] xfs: create a new inode fork block unmap helper Darrick J. Wong
2024-04-17 21:32   ` [PATCH 43/67] xfs: improve dquot iteration for scrub Darrick J. Wong
2024-04-17 21:33   ` [PATCH 44/67] xfs: add lock protection when remove perag from radix tree Darrick J. Wong
2024-04-17 21:33   ` [PATCH 45/67] xfs: fix perag leak when growfs fails Darrick J. Wong
2024-04-17 21:33   ` [PATCH 46/67] xfs: remove the xfs_alloc_arg argument to xfs_bmap_btalloc_accounting Darrick J. Wong
2024-04-17 21:34   ` [PATCH 47/67] xfs: also use xfs_bmap_btalloc_accounting for RT allocations Darrick J. Wong
2024-04-17 21:34   ` [PATCH 48/67] xfs: return -ENOSPC from xfs_rtallocate_* Darrick J. Wong
2024-04-17 21:34   ` [PATCH 49/67] xfs: indicate if xfs_bmap_adjacent changed ap->blkno Darrick J. Wong
2024-04-17 21:34   ` [PATCH 50/67] xfs: move xfs_rtget_summary to xfs_rtbitmap.c Darrick J. Wong
2024-04-17 21:35   ` [PATCH 51/67] xfs: split xfs_rtmodify_summary_int Darrick J. Wong
2024-04-17 21:35   ` [PATCH 52/67] xfs: remove rt-wrappers from xfs_format.h Darrick J. Wong
2024-04-17 21:35   ` [PATCH 53/67] xfs: remove XFS_RTMIN/XFS_RTMAX Darrick J. Wong
2024-04-17 21:35   ` [PATCH 54/67] xfs: make if_data a void pointer Darrick J. Wong
2024-04-17 21:36   ` [PATCH 55/67] xfs: return if_data from xfs_idata_realloc Darrick J. Wong
2024-04-17 21:36   ` [PATCH 56/67] xfs: move the xfs_attr_sf_lookup tracepoint Darrick J. Wong
2024-04-17 21:36   ` [PATCH 57/67] xfs: simplify xfs_attr_sf_findname Darrick J. Wong
2024-04-17 21:36   ` [PATCH 58/67] xfs: remove xfs_attr_shortform_lookup Darrick J. Wong
2024-04-17 21:37   ` [PATCH 59/67] xfs: use xfs_attr_sf_findname in xfs_attr_shortform_getvalue Darrick J. Wong
2024-04-17 21:37   ` [PATCH 60/67] xfs: remove struct xfs_attr_shortform Darrick J. Wong
2024-04-17 21:37   ` [PATCH 61/67] xfs: remove xfs_attr_sf_hdr_t Darrick J. Wong
2024-04-17 21:37   ` [PATCH 62/67] xfs: turn the XFS_DA_OP_REPLACE checks in xfs_attr_shortform_addname into asserts Darrick J. Wong
2024-04-17 21:38   ` [PATCH 63/67] xfs: fix a use after free in xfs_defer_finish_recovery Darrick J. Wong
2024-04-17 21:38   ` [PATCH 64/67] xfs: use the op name in trace_xlog_intent_recovery_failed Darrick J. Wong
2024-04-17 21:38   ` [PATCH 65/67] xfs: fix backwards logic in xfs_bmap_alloc_account Darrick J. Wong
2024-04-17 21:38   ` [PATCH 66/67] xfs: reset XFS_ATTR_INCOMPLETE filter on node removal Darrick J. Wong
2024-04-17 21:39   ` [PATCH 67/67] xfs: remove conditional building of rt geometry validator functions Darrick J. Wong
2024-04-17 21:16 ` [PATCHSET 05/11] xfs_repair: faster btree bulkloading Darrick J. Wong
2024-04-17 21:39   ` [PATCH 1/2] xfs_repair: adjust btree bulkloading slack computations to match online repair Darrick J. Wong
2024-04-17 21:39   ` [PATCH 2/2] xfs_repair: bulk load records into new btree blocks Darrick J. Wong
2024-04-17 21:16 ` [PATCHSET 06/11] xfsprogs: bug fixes for 6.8 Darrick J. Wong
2024-04-17 21:40   ` [PATCH 1/5] xfs_repair: double-check with shortform attr verifiers Darrick J. Wong
2024-04-17 21:40   ` [PATCH 2/5] xfs_db: improve number extraction in getbitval Darrick J. Wong
2024-04-17 21:40   ` [PATCH 3/5] xfs_scrub: fix threadcount estimates for phase 6 Darrick J. Wong
2024-04-17 21:40   ` [PATCH 4/5] xfs_scrub: don't fail while reporting media scan errors Darrick J. Wong
2024-04-17 21:41   ` [PATCH 5/5] xfs_io: add linux madvise advice codes Darrick J. Wong
2024-04-17 21:17 ` [PATCHSET V3 07/11] xfsprogs: fix log sector size detection Darrick J. Wong
2024-04-17 21:41   ` [PATCH 1/5] libxfs: remove the unused fs_topology_t typedef Darrick J. Wong
2024-04-17 21:41   ` [PATCH 2/5] libxfs: refactor the fs_topology structure Darrick J. Wong
2024-04-17 21:41   ` [PATCH 3/5] libxfs: remove the S_ISREG check from blkid_get_topology Darrick J. Wong
2024-04-17 21:42   ` [PATCH 4/5] libxfs: also query log device topology in get_topology Darrick J. Wong
2024-04-17 21:42   ` [PATCH 5/5] mkfs: use a sensible log sector size default Darrick J. Wong
2024-04-17 21:17 ` [PATCHSET 08/11] mkfs: scale shards on ssds Darrick J. Wong
2024-04-17 21:42   ` [PATCH 1/2] mkfs: allow sizing allocation groups for concurrency Darrick J. Wong
2024-04-17 21:42   ` [PATCH 2/2] mkfs: allow sizing internal logs " Darrick J. Wong
2024-04-17 21:17 ` [PATCHSET v30.3 09/11] xfs_scrub: scan metadata files in parallel Darrick J. Wong
2024-04-17 21:43   ` [PATCH 1/3] libfrog: rename XFROG_SCRUB_TYPE_* to XFROG_SCRUB_GROUP_* Darrick J. Wong
2024-04-17 21:43   ` [PATCH 2/3] libfrog: promote XFROG_SCRUB_DESCR_SUMMARY to a scrub type Darrick J. Wong
2024-04-17 21:43   ` [PATCH 3/3] xfs_scrub: scan whole-fs metadata files in parallel Darrick J. Wong
2024-04-17 21:17 ` [PATCHSET v30.3 10/11] xfs_repair: rebuild inode fork mappings Darrick J. Wong
2024-04-17 21:43   ` [PATCH 1/3] xfs_repair: push inode buf and dinode pointers all the way to inode fork processing Darrick J. Wong
2024-04-17 21:44   ` [PATCH 2/3] xfs_repair: sync bulkload data structures with kernel newbt code Darrick J. Wong
2024-04-17 21:44   ` Darrick J. Wong [this message]
2024-04-17 21:18 ` [PATCHSET 11/11] xfs_repair: support more than 4 billion records Darrick J. Wong
2024-04-17 21:44   ` [PATCH 1/8] xfs_db: add a bmbt inflation command Darrick J. Wong
2024-04-17 21:45   ` [PATCH 2/8] xfs_repair: slab and bag structs need to track more than 2^32 items Darrick J. Wong
2024-04-17 21:45   ` [PATCH 3/8] xfs_repair: support more than 2^32 rmapbt records per AG Darrick J. Wong
2024-04-17 21:45   ` [PATCH 4/8] xfs_repair: support more than 2^32 owners per physical block Darrick J. Wong
2024-04-17 21:45   ` [PATCH 5/8] xfs_repair: clean up lock resources Darrick J. Wong
2024-04-17 21:46   ` [PATCH 6/8] xfs_repair: constrain attr fork extent count Darrick J. Wong
2024-04-17 21:46   ` [PATCH 7/8] xfs_repair: don't create block maps for data files Darrick J. Wong
2024-04-17 21:46   ` [PATCH 8/8] xfs_repair: support more than INT_MAX block maps Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2024-03-26  2:56 [PATCHSET v29.4 08/18] xfs_repair: rebuild inode fork mappings Darrick J. Wong
2024-03-26  3:25 ` [PATCH 3/3] xfs_repair: rebuild block mappings from rmapbt data Darrick J. Wong
2024-03-13  1:49 [PATCHSET v29.4 08/10] xfs_repair: rebuild inode fork mappings Darrick J. Wong
2024-03-13  2:14 ` [PATCH 3/3] xfs_repair: rebuild block mappings from rmapbt data Darrick J. Wong
2024-03-14  1:59   ` Christoph Hellwig
2024-03-14 23:56     ` Darrick J. Wong
2024-03-17 21:10       ` Christoph Hellwig
2024-03-14 17:13   ` Bill O'Donnell
2023-12-31 19:41 [PATCHSET v29.0 06/40] xfs_repair: rebuild inode fork mappings Darrick J. Wong
2023-12-31 22:08 ` [PATCH 3/3] xfs_repair: rebuild block mappings from rmapbt data Darrick J. Wong

find likely ancestor, descendant, or conflicting patches for this message:
dfblob:ab298ccf dfblob:1b97881b dfblob:28960317 dfblob:bd1186b2
dfblob:2c40e59a dfblob:c6f0512f dfblob:18158c39 dfblob:f4790e3b
dfblob:a18af3ff dfblob:6bb77e08 dfblob:6004e9f6 dfblob:ac82c3bc
dfblob:5df95e69 dfblob:769733ec dfblob:8143a6a9 dfblob:e5014deb
dfblob:38f3f7b8 dfblob:1dbcafb2 dfblob:6d553594 dfblob:a97839f5
dfblob:a88aafaa dfblob:b8f5bf4e dfblob:a2291c7b dfblob:1dad2f58
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=171338845464.1856515.6344335951685084221.stgit@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=bodonnel@redhat.com \
    --cc=cem@kernel.org \
    --cc=hch@lst.de \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.