All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: djwong@kernel.org
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 3/6] xfs: log EFIs for all btree blocks being used to stage a btree
Date: Thu, 27 Jul 2023 15:24:32 -0700	[thread overview]
Message-ID: <169049623218.921279.10028914723578681696.stgit@frogsfrogsfrogs> (raw)
In-Reply-To: <169049623167.921279.16448199708156630380.stgit@frogsfrogsfrogs>

From: Darrick J. Wong <djwong@kernel.org>

We need to log EFIs for every extent that we allocate for the purpose of
staging a new btree so that if we fail then the blocks will be freed
during log recovery.  Add a function to relog the EFIs, so that repair
can relog them all every time it creates a new btree block, which will
help us to avoid pinning the log tail.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/scrub/newbt.c  |  147 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/newbt.h  |    4 +
 fs/xfs/scrub/repair.c |   10 +++
 fs/xfs/scrub/repair.h |    1 
 4 files changed, 162 insertions(+)


diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c
index 2eceac52f2834..cbfe2ffada635 100644
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -13,12 +13,14 @@
 #include "xfs_btree_staging.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
+#include "xfs_log.h"
 #include "xfs_sb.h"
 #include "xfs_inode.h"
 #include "xfs_alloc.h"
 #include "xfs_rmap.h"
 #include "xfs_ag.h"
 #include "xfs_defer.h"
+#include "xfs_extfree_item.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
@@ -124,6 +126,139 @@ xrep_newbt_init_bare(
 			XFS_AG_RESV_NONE);
 }
 
+/*
+ * Set up automatic reaping of the blocks reserved for btree reconstruction in
+ * case we crash by logging a deferred free item for each extent we allocate so
+ * that we can get all of the space back if we crash before we can commit the
+ * new btree.  This function returns a token that can be used to cancel
+ * automatic reaping if repair is successful.
+ */
+static int
+xrep_newbt_schedule_autoreap(
+	struct xrep_newbt		*xnr,
+	struct xrep_newbt_resv		*resv)
+{
+	struct xfs_extent_free_item	efi_item = {
+		.xefi_blockcount	= resv->len,
+		.xefi_owner		= xnr->oinfo.oi_owner,
+		.xefi_flags		= XFS_EFI_SKIP_DISCARD,
+		.xefi_pag		= resv->pag,
+	};
+	struct xfs_scrub		*sc = xnr->sc;
+	struct xfs_log_item		*lip;
+	LIST_HEAD(items);
+
+	ASSERT(xnr->oinfo.oi_offset == 0);
+
+	efi_item.xefi_startblock = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno,
+			resv->agbno);
+	if (xnr->oinfo.oi_flags & XFS_OWNER_INFO_ATTR_FORK)
+		efi_item.xefi_flags |= XFS_EFI_ATTR_FORK;
+	if (xnr->oinfo.oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
+		efi_item.xefi_flags |= XFS_EFI_BMBT_BLOCK;
+
+	INIT_LIST_HEAD(&efi_item.xefi_list);
+	list_add(&efi_item.xefi_list, &items);
+
+	xfs_perag_intent_hold(resv->pag);
+	lip = xfs_extent_free_defer_type.create_intent(sc->tp, &items, 1,
+			false);
+	ASSERT(lip != NULL && !IS_ERR(lip));
+
+	resv->efi = lip;
+	return 0;
+}
+
+/*
+ * Earlier, we logged EFIs for the extents that we allocated to hold the new
+ * btree so that we could automatically roll back those allocations if the
+ * system crashed.  Now we log an EFD to cancel the EFI, either because the
+ * repair succeeded and the new blocks are in use; or because the repair was
+ * cancelled and we're about to free the extents directly.
+ */
+static inline void
+xrep_newbt_finish_autoreap(
+	struct xfs_scrub	*sc,
+	struct xrep_newbt_resv	*resv)
+{
+	struct xfs_efd_log_item	*efdp;
+	struct xfs_extent	*extp;
+	struct xfs_log_item	*efd_lip;
+
+	efd_lip = xfs_extent_free_defer_type.create_done(sc->tp, resv->efi, 1);
+	efdp = container_of(efd_lip, struct xfs_efd_log_item, efd_item);
+	extp = efdp->efd_format.efd_extents;
+	extp->ext_start = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno,
+					 resv->agbno);
+	extp->ext_len = resv->len;
+	efdp->efd_next_extent++;
+	set_bit(XFS_LI_DIRTY, &efd_lip->li_flags);
+	xfs_perag_intent_rele(resv->pag);
+}
+
+/* Abort an EFI logged for a new btree block reservation. */
+static inline void
+xrep_newbt_cancel_autoreap(
+	struct xrep_newbt_resv	*resv)
+{
+	xfs_extent_free_defer_type.abort_intent(resv->efi);
+	xfs_perag_intent_rele(resv->pag);
+}
+
+/*
+ * Relog the EFIs attached to a staging btree so that we don't pin the log
+ * tail.  Same logic as xfs_defer_relog.
+ */
+int
+xrep_newbt_relog_autoreap(
+	struct xrep_newbt	*xnr)
+{
+	struct xrep_newbt_resv	*resv;
+	unsigned int		efi_bytes = 0;
+
+	list_for_each_entry(resv, &xnr->resv_list, list) {
+		/*
+		 * If the log intent item for this deferred op is in a
+		 * different checkpoint, relog it to keep the log tail moving
+		 * forward.  We're ok with this being racy because an incorrect
+		 * decision means we'll be a little slower at pushing the tail.
+		 */
+		if (!resv->efi || xfs_log_item_in_current_chkpt(resv->efi))
+			continue;
+
+		resv->efi = xfs_trans_item_relog(resv->efi, xnr->sc->tp);
+
+		/*
+		 * If free space is very fragmented, it's possible that the new
+		 * btree will be allocated a large number of small extents.
+		 * On an active system, it's possible that so many of those
+		 * EFIs will need relogging here that doing them all in one
+		 * transaction will overflow the reservation.
+		 *
+		 * Each allocation for the new btree (xrep_newbt_resv) points
+		 * to a unique single-mapping EFI, so each relog operation logs
+		 * a single-mapping EFD followed by a new EFI.  Each single
+		 * mapping EF[ID] item consumes about 128 bytes, so we'll
+		 * assume 256 bytes per relog.  Roll if we consume more than
+		 * half of the transaction reservation.
+		 */
+		efi_bytes += 256;
+		if (efi_bytes > xnr->sc->tp->t_log_res / 2) {
+			int	error;
+
+			error = xrep_roll_trans(xnr->sc);
+			if (error)
+				return error;
+
+			efi_bytes = 0;
+		}
+	}
+
+	if (xnr->sc->tp->t_flags & XFS_TRANS_DIRTY)
+		return xrep_roll_trans(xnr->sc);
+	return 0;
+}
+
 /*
  * Designate specific blocks to be used to build our new btree.  @pag must be
  * a passive reference.
@@ -136,6 +271,7 @@ xrep_newbt_add_blocks(
 	xfs_extlen_t			len)
 {
 	struct xrep_newbt_resv		*resv;
+	int				error;
 
 	resv = kmalloc(sizeof(struct xrep_newbt_resv), XCHK_GFP_FLAGS);
 	if (!resv)
@@ -147,8 +283,16 @@ xrep_newbt_add_blocks(
 	resv->used = 0;
 	resv->pag = xfs_perag_hold(pag);
 
+	error = xrep_newbt_schedule_autoreap(xnr, resv);
+	if (error)
+		goto out_pag;
+
 	list_add_tail(&resv->list, &xnr->resv_list);
 	return 0;
+out_pag:
+	xfs_perag_put(resv->pag);
+	kfree(resv);
+	return error;
 }
 
 /* Don't let our allocation hint take us beyond this AG */
@@ -326,6 +470,8 @@ xrep_newbt_free_extent(
 		free_aglen -= resv->used;
 	}
 
+	xrep_newbt_finish_autoreap(sc, resv);
+
 	if (free_aglen == 0)
 		return 0;
 
@@ -396,6 +542,7 @@ xrep_newbt_free(
 	 * reservations.
 	 */
 	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
+		xrep_newbt_cancel_autoreap(resv);
 		list_del(&resv->list);
 		xfs_perag_put(resv->pag);
 		kfree(resv);
diff --git a/fs/xfs/scrub/newbt.h b/fs/xfs/scrub/newbt.h
index ca53271f3a4c6..cf822472f1667 100644
--- a/fs/xfs/scrub/newbt.h
+++ b/fs/xfs/scrub/newbt.h
@@ -12,6 +12,9 @@ struct xrep_newbt_resv {
 
 	struct xfs_perag	*pag;
 
+	/* EFI tracking this space reservation */
+	struct xfs_log_item	*efi;
+
 	/* AG block of the extent we reserved. */
 	xfs_agblock_t		agbno;
 
@@ -58,5 +61,6 @@ void xrep_newbt_cancel(struct xrep_newbt *xnr);
 int xrep_newbt_commit(struct xrep_newbt *xnr);
 int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
 		union xfs_btree_ptr *ptr);
+int xrep_newbt_relog_autoreap(struct xrep_newbt *xnr);
 
 #endif /* __XFS_SCRUB_NEWBT_H__ */
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 83a1b1437a4fa..c2474cc40d04c 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -167,6 +167,16 @@ xrep_roll_ag_trans(
 	return 0;
 }
 
+/* Roll the scrub transaction, holding the primary metadata locked. */
+int
+xrep_roll_trans(
+	struct xfs_scrub	*sc)
+{
+	if (!sc->ip)
+		return xrep_roll_ag_trans(sc);
+	return xfs_trans_roll_inode(&sc->tp, sc->ip);
+}
+
 /* Finish all deferred work attached to the repair transaction. */
 int
 xrep_defer_finish(
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index dc89164d10a63..9ea1eb0aae49d 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -20,6 +20,7 @@ static inline int xrep_notsupported(struct xfs_scrub *sc)
 int xrep_attempt(struct xfs_scrub *sc);
 void xrep_failure(struct xfs_mount *mp);
 int xrep_roll_ag_trans(struct xfs_scrub *sc);
+int xrep_roll_trans(struct xfs_scrub *sc);
 int xrep_defer_finish(struct xfs_scrub *sc);
 bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
 		enum xfs_ag_resv_type type);


  parent reply	other threads:[~2023-07-27 22:24 UTC|newest]

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-27 22:11 [MEGAPATCHSET v26] xfs: online repair, part of part 1 Darrick J. Wong
2023-07-27 22:18 ` [PATCHSET v26.0 0/9] xfs: fix online repair block reaping Darrick J. Wong
2023-07-27 22:21   ` [PATCH 1/9] xfs: cull repair code that will never get used Darrick J. Wong
2023-07-27 22:21   ` [PATCH 2/9] xfs: move the post-repair block reaping code to a separate file Darrick J. Wong
2023-07-27 22:22   ` [PATCH 3/9] xfs: only invalidate blocks if we're going to free them Darrick J. Wong
2023-07-27 22:22   ` [PATCH 4/9] xfs: only allow reaping of per-AG blocks in xrep_reap_extents Darrick J. Wong
2023-07-27 22:22   ` [PATCH 5/9] xfs: use deferred frees to reap old btree blocks Darrick J. Wong
2023-07-27 22:22   ` [PATCH 6/9] xfs: rearrange xrep_reap_block to make future code flow easier Darrick J. Wong
2023-07-27 22:23   ` [PATCH 7/9] xfs: allow scanning ranges of the buffer cache for live buffers Darrick J. Wong
2023-07-27 22:23   ` [PATCH 8/9] xfs: reap large AG metadata extents when possible Darrick J. Wong
2023-07-27 22:23   ` [PATCH 9/9] xfs: use per-AG bitmaps to reap unused AG metadata blocks during repair Darrick J. Wong
2023-08-07  6:19   ` [PATCHSET v26.0 0/9] xfs: fix online repair block reaping Dave Chinner
2023-08-08  0:40     ` Darrick J. Wong
2023-08-08  5:17       ` Dave Chinner
2023-08-09 23:17         ` Darrick J. Wong
2023-07-27 22:18 ` [PATCHSET v26.0 0/6] xfs: prepare repair for bulk loading Darrick J. Wong
2023-07-27 22:24   ` [PATCH 1/6] xfs: force all buffers to be written during btree bulk load Darrick J. Wong
2023-07-27 22:24   ` [PATCH 2/6] xfs: implement block reservation accounting for btrees we're staging Darrick J. Wong
2023-08-07  6:58     ` Dave Chinner
2023-08-08  1:08       ` Darrick J. Wong
2023-07-27 22:24   ` Darrick J. Wong [this message]
2023-08-07  8:41     ` [PATCH 3/6] xfs: log EFIs for all btree blocks being used to stage a btree Dave Chinner
2023-08-08  0:54       ` Darrick J. Wong
2023-08-08  6:11         ` Dave Chinner
2023-08-09 23:52           ` Darrick J. Wong
2023-08-10 20:36             ` Darrick J. Wong
2023-09-08 23:34       ` Darrick J. Wong
2023-07-27 22:24   ` [PATCH 4/6] xfs: add debug knobs to control btree bulk load slack factors Darrick J. Wong
2023-07-27 22:25   ` [PATCH 5/6] xfs: move btree bulkload record initialization to ->get_record implementations Darrick J. Wong
2023-07-27 22:25   ` [PATCH 6/6] xfs: constrain dirty buffers while formatting a staged btree Darrick J. Wong
2023-07-27 22:19 ` [PATCHSET v26.0 0/7] xfs: stage repair information in pageable memory Darrick J. Wong
2023-07-27 22:25   ` [PATCH 1/7] xfs: create a big array data structure Darrick J. Wong
2023-07-28  3:10     ` Matthew Wilcox
2023-07-28  4:39       ` Darrick J. Wong
2023-07-27 22:25   ` [PATCH 2/7] xfs: enable sorting of xfile-backed arrays Darrick J. Wong
2023-07-27 22:26   ` [PATCH 3/7] xfs: convert xfarray insertion sort to heapsort using scratchpad memory Darrick J. Wong
2023-07-27 22:26   ` [PATCH 4/7] xfs: teach xfile to pass back direct-map pages to caller Darrick J. Wong
2023-07-27 22:26   ` [PATCH 5/7] xfs: speed up xfarray sort by sorting xfile page contents directly Darrick J. Wong
2023-07-27 22:26   ` [PATCH 6/7] xfs: cache pages used for xfarray quicksort convergence Darrick J. Wong
2023-07-27 22:27   ` [PATCH 7/7] xfs: improve xfarray quicksort pivot Darrick J. Wong
2023-07-27 22:19 ` [PATCHSET v26.0 0/2] xfs: add usage counters for scrub Darrick J. Wong
2023-07-27 22:27   ` [PATCH 1/2] xfs: create scaffolding for creating debugfs entries Darrick J. Wong
2023-07-27 22:27   ` [PATCH 2/2] xfs: track usage statistics of online fsck Darrick J. Wong
2023-08-08  7:09   ` [PATCHSET v26.0 0/2] xfs: add usage counters for scrub Dave Chinner
2023-07-27 22:19 ` [PATCHSET v26.0 0/4] xfs: online scrubbing of realtime summary files Darrick J. Wong
2023-07-27 22:27   ` [PATCH 1/4] xfs: get our own reference to inodes that we want to scrub Darrick J. Wong
2023-07-27 22:28   ` [PATCH 2/4] xfs: wrap ilock/iunlock operations on sc->ip Darrick J. Wong
2023-07-27 22:28   ` [PATCH 3/4] xfs: move the realtime summary file scrubber to a separate source file Darrick J. Wong
2023-07-27 22:28   ` [PATCH 4/4] xfs: implement online scrubbing of rtsummary info Darrick J. Wong
2023-07-27 22:19 ` [PATCHSET v26.0 0/2] xfs: miscellaneous repair tweaks Darrick J. Wong
2023-07-27 22:28   ` [PATCH 1/2] xfs: always rescan allegedly healthy per-ag metadata after repair Darrick J. Wong
2023-07-27 22:29   ` [PATCH 2/2] xfs: allow the user to cancel repairs before we start writing Darrick J. Wong
2023-07-27 22:20 ` [PATCHSET v26.0 0/2] xfs: force rebuilding of metadata Darrick J. Wong
2023-07-27 22:29   ` [PATCH 1/2] xfs: don't complain about unfixed metadata when repairs were injected Darrick J. Wong
2023-07-27 22:29   ` [PATCH 2/2] xfs: allow userspace to rebuild metadata structures Darrick J. Wong
2023-07-27 22:20 ` [PATCHSET v26.0 0/2] xfs: fixes to the AGFL repair code Darrick J. Wong
2023-07-27 22:30   ` [PATCH 1/2] xfs: clear pagf_agflreset when repairing the AGFL Darrick J. Wong
2023-07-27 22:30   ` [PATCH 2/2] xfs: fix agf_fllast when repairing an empty AGFL Darrick J. Wong
2023-08-08  7:10     ` Dave Chinner
2023-07-27 22:20 ` [PATCHSET v26.0 0/5] xfs: online repair of AG btrees Darrick J. Wong
2023-07-27 22:30   ` [PATCH 1/5] xfs: repair free space btrees Darrick J. Wong
2023-07-27 22:30   ` [PATCH 2/5] xfs: hide xfs_inode_is_allocated in scrub common code Darrick J. Wong
2023-08-08  7:13     ` Dave Chinner
2023-07-27 22:31   ` [PATCH 3/5] xfs: rewrite xchk_inode_is_allocated to work properly Darrick J. Wong
2023-08-08  7:14     ` Dave Chinner
2023-07-27 22:31   ` [PATCH 4/5] xfs: repair inode btrees Darrick J. Wong
2023-07-27 22:31   ` [PATCH 5/5] xfs: repair refcount btrees Darrick J. Wong
2023-07-27 22:20 ` [PATCHSET v26.0 0/2] xfs: fixes for the block mapping checker Darrick J. Wong
2023-07-27 22:31   ` [PATCH 1/2] xfs: simplify returns in xchk_bmap Darrick J. Wong
2023-07-27 22:32   ` [PATCH 2/2] xfs: don't check reflink iflag state when checking cow fork Darrick J. Wong
2023-08-08  7:16   ` [PATCHSET v26.0 0/2] xfs: fixes for the block mapping checker Dave Chinner
2023-07-27 22:21 ` [PATCHSET v26.0 0/6] xfs: online repair of inodes and forks Darrick J. Wong
2023-07-27 22:32   ` [PATCH 1/6] xfs: disable online repair quota helpers when quota not enabled Darrick J. Wong
2023-07-27 22:32   ` [PATCH 2/6] xfs: try to attach dquots to files before repairing them Darrick J. Wong
2023-07-27 22:32   ` [PATCH 3/6] xfs: repair inode records Darrick J. Wong
2023-08-09  8:42     ` Dave Chinner
2023-08-10  0:43       ` Darrick J. Wong
2023-07-27 22:33   ` [PATCH 4/6] xfs: zap broken inode forks Darrick J. Wong
2023-07-27 22:33   ` [PATCH 5/6] xfs: abort directory parent scrub scans if we encounter a zapped directory Darrick J. Wong
2023-07-27 22:33   ` [PATCH 6/6] xfs: repair obviously broken inode modes Darrick J. Wong
2023-08-09  9:44   ` [PATCHSET v26.0 0/6] xfs: online repair of inodes and forks Dave Chinner
2023-08-10  0:45     ` Darrick J. Wong
2023-07-27 22:21 ` [PATCHSET v26.0 0/5] xfs: online repair of file fork mappings Darrick J. Wong
2023-07-27 22:33   ` [PATCH 1/5] xfs: reintroduce reaping of file metadata blocks to xrep_reap_extents Darrick J. Wong
2023-07-27 22:34   ` [PATCH 2/5] xfs: repair inode fork block mapping data structures Darrick J. Wong
2023-07-27 22:34   ` [PATCH 3/5] xfs: refactor repair forcing tests into a repair.c helper Darrick J. Wong
2023-07-27 22:34   ` [PATCH 4/5] xfs: create a ranged query function for refcount btrees Darrick J. Wong
2023-07-27 22:34   ` [PATCH 5/5] xfs: repair problems in CoW forks Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2023-05-26  0:28 [PATCHSET v25.0 0/6] xfs: prepare repair for bulk loading Darrick J. Wong
2023-05-26  0:46 ` [PATCH 3/6] xfs: log EFIs for all btree blocks being used to stage a btree Darrick J. Wong
2022-12-30 22:12 [PATCHSET v24.0 0/6] xfs: prepare repair for bulk loading Darrick J. Wong
2022-12-30 22:12 ` [PATCH 3/6] xfs: log EFIs for all btree blocks being used to stage a btree Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=169049623218.921279.10028914723578681696.stgit@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.