Git Mailing List Archive mirror
 help / color / mirror / Atom feed
From: Christian Couder <christian.couder@gmail.com>
To: git@vger.kernel.org
Cc: Junio C Hamano <gitster@pobox.com>,
	John Cai <johncai86@gmail.com>,
	Jonathan Tan <jonathantanmy@google.com>,
	Jonathan Nieder <jrnieder@gmail.com>,
	Taylor Blau <me@ttaylorr.com>, Derrick Stolee <stolee@gmail.com>,
	Christian Couder <chriscool@tuxfamily.org>
Subject: [PATCH v2 2/2] repack: add --filter=<filter-spec> option
Date: Tue, 25 Oct 2022 14:28:56 +0200	[thread overview]
Message-ID: <20221025122856.20204-3-christian.couder@gmail.com> (raw)
In-Reply-To: <20221025122856.20204-1-christian.couder@gmail.com>

From: Christian Couder <chriscool@tuxfamily.org>

After cloning with --filter=<filter-spec>, for example to avoid
getting unneeded large files on a user machine, it's possible
that some of these large files still get fetched for some reasons
(like checking out old branches) over time.

In this case the repo size could grow too much for no good reason
and `git repack --filter=<filter-spec>` would be useful to remove
the unneeded large files.

This command could be dangerous to use though, as it might remove
local objects that haven't been pushed which would lose data and
corrupt the repo. On a server, this command could also corrupt a
repo unless ALL the removed objects aren't already available in
another remote that clients can access.

Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 Documentation/git-repack.txt |  8 ++++++++
 builtin/repack.c             | 22 +++++++++++++++-------
 t/t7700-repack.sh            | 15 +++++++++++++++
 3 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index 0bf13893d8..d28bb68cfc 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -137,6 +137,14 @@ depth is 4095.
 	a larger and slower repository; see the discussion in
 	`pack.packSizeLimit`.
 
+--filter=<filter-spec>::
+	Omits certain objects (usually blobs) from the resulting
+	packfile. WARNING: this could easily corrupt the current repo
+	and lose data if ANY of the omitted objects hasn't been already
+	pushed to a remote. Be very careful about objects that might
+	have been created locally! See linkgit:git-rev-list[1] for valid
+	`<filter-spec>` forms.
+
 -b::
 --write-bitmap-index::
 	Write a reachability bitmap index as part of the repack. This
diff --git a/builtin/repack.c b/builtin/repack.c
index a5bacc7797..d8b8a58f73 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -50,6 +50,7 @@ struct pack_objects_args {
 	const char *depth;
 	const char *threads;
 	const char *max_pack_size;
+	const char *filter;
 	int no_reuse_delta;
 	int no_reuse_object;
 	int quiet;
@@ -201,6 +202,8 @@ static void prepare_pack_objects(struct child_process *cmd,
 		strvec_pushf(&cmd->args, "--threads=%s", args->threads);
 	if (args->max_pack_size)
 		strvec_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size);
+	if (args->filter)
+		strvec_pushf(&cmd->args, "--filter=%s", args->filter);
 	if (args->no_reuse_delta)
 		strvec_pushf(&cmd->args, "--no-reuse-delta");
 	if (args->no_reuse_object)
@@ -268,6 +271,13 @@ static unsigned populate_pack_exts(char *name)
 	return ret;
 }
 
+static void write_promisor_file_1(char *p)
+{
+	char *promisor_name = mkpathdup("%s-%s.promisor", packtmp, p);
+	write_promisor_file(promisor_name, NULL, 0);
+	free(promisor_name);
+}
+
 static void repack_promisor_objects(const struct pack_objects_args *args,
 				    struct string_list *names)
 {
@@ -299,7 +309,6 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
 	out = xfdopen(cmd.out, "r");
 	while (strbuf_getline_lf(&line, out) != EOF) {
 		struct string_list_item *item;
-		char *promisor_name;
 
 		if (line.len != the_hash_algo->hexsz)
 			die(_("repack: Expecting full hex object ID lines only from pack-objects."));
@@ -316,13 +325,8 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
 		 * concatenate the contents of all .promisor files instead of
 		 * just creating a new empty file.
 		 */
-		promisor_name = mkpathdup("%s-%s.promisor", packtmp,
-					  line.buf);
-		write_promisor_file(promisor_name, NULL, 0);
-
+		write_promisor_file_1(line.buf);
 		item->util = (void *)(uintptr_t)populate_pack_exts(item->string);
-
-		free(promisor_name);
 	}
 	fclose(out);
 	if (finish_command(&cmd))
@@ -786,6 +790,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 				N_("limits the maximum number of threads")),
 		OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
 				N_("maximum size of each packfile")),
+		OPT_STRING(0, "filter", &po_args.filter, N_("args"),
+				N_("object filtering")),
 		OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
 				N_("repack objects in packs marked with .keep")),
 		OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
@@ -955,6 +961,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 		if (line.len != the_hash_algo->hexsz)
 			die(_("repack: Expecting full hex object ID lines only from pack-objects."));
 		string_list_append(&names, line.buf);
+		if (po_args.filter)
+			write_promisor_file_1(line.buf);
 	}
 	fclose(out);
 	ret = finish_command(&cmd);
diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh
index ca45c4cd2c..363eaf8fea 100755
--- a/t/t7700-repack.sh
+++ b/t/t7700-repack.sh
@@ -237,6 +237,21 @@ test_expect_success 'auto-bitmaps do not complain if unavailable' '
 	test_must_be_empty actual
 '
 
+test_expect_success 'repacking with a filter works' '
+	test_when_finished "rm -rf server client" &&
+	test_create_repo server &&
+	git -C server config uploadpack.allowFilter true &&
+	git -C server config uploadpack.allowAnySHA1InWant true &&
+	test_commit -C server 1 &&
+	git clone --bare --no-local server client &&
+	git -C client config remote.origin.promisor true &&
+	git -C client rev-list --objects --all --missing=print >objects &&
+	test $(grep "^?" objects | wc -l) = 0 &&
+	git -C client -c repack.writebitmaps=false repack -a -d --filter=blob:none &&
+	git -C client rev-list --objects --all --missing=print >objects &&
+	test $(grep "^?" objects | wc -l) = 1
+'
+
 objdir=.git/objects
 midx=$objdir/pack/multi-pack-index
 
-- 
2.38.1.145.g80cce38e46.dirty


  parent reply	other threads:[~2022-10-25 12:29 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-12 13:51 [PATCH 0/3] Implement filtering repacks Christian Couder
2022-10-12 13:51 ` [PATCH 1/3] pack-objects: allow --filter without --stdout Christian Couder
2022-10-12 13:51 ` [PATCH 2/3] repack: add --filter=<filter-spec> option Christian Couder
2022-10-12 13:51 ` [PATCH 3/3] repack: introduce --force to force filtering Christian Couder
2022-10-14 16:46 ` [PATCH 0/3] Implement filtering repacks Junio C Hamano
2022-10-20 11:23   ` Christian Couder
2022-10-28 19:49     ` Taylor Blau
2022-10-28 20:26       ` Junio C Hamano
2022-11-07  9:12         ` Christian Couder
2022-11-07  9:00       ` Christian Couder
2022-10-25 12:28 ` [PATCH v2 0/2] " Christian Couder
2022-10-25 12:28   ` [PATCH v2 1/2] pack-objects: allow --filter without --stdout Christian Couder
2022-10-25 12:28   ` Christian Couder [this message]
2022-10-28 19:54   ` [PATCH v2 0/2] Implement filtering repacks Taylor Blau
2022-11-07  9:29     ` Christian Couder
2022-11-22 17:51   ` [PATCH v3 " Christian Couder
2022-11-22 17:51     ` [PATCH v3 1/2] pack-objects: allow --filter without --stdout Christian Couder
2022-11-22 17:51     ` [PATCH v3 2/2] repack: add --filter=<filter-spec> option Christian Couder
2022-11-23  0:31     ` [PATCH v3 0/2] Implement filtering repacks Junio C Hamano
2022-12-21  3:53       ` Christian Couder
2022-11-23  0:35     ` Junio C Hamano
2022-12-21  4:04     ` [PATCH v4 0/3] " Christian Couder
2022-12-21  4:04       ` [PATCH v4 1/3] pack-objects: allow --filter without --stdout Christian Couder
2023-01-04 14:56         ` Patrick Steinhardt
2022-12-21  4:04       ` [PATCH v4 2/3] repack: add --filter=<filter-spec> option Christian Couder
2023-01-04 14:56         ` Patrick Steinhardt
2023-01-05  1:39           ` Junio C Hamano
2022-12-21  4:04       ` [PATCH v4 3/3] gc: add gc.repackFilter config option Christian Couder
2023-01-04 14:57         ` Patrick Steinhardt
2024-05-15 13:25 ` [PATCH v2 0/3] upload-pack: support a missing-action Christian Couder
2024-05-15 13:25   ` [PATCH v2 1/3] rev-list: refactor --missing=<missing-action> Christian Couder
2024-05-15 16:16     ` Junio C Hamano
2024-05-15 13:25   ` [PATCH v2 2/3] pack-objects: use the missing action API Christian Couder
2024-05-15 16:46     ` Junio C Hamano
2024-05-15 13:25   ` [PATCH v2 3/3] upload-pack: allow configuring a missing-action Christian Couder
2024-05-15 17:08     ` Junio C Hamano
2024-05-15 13:59   ` [PATCH v2 0/3] upload-pack: support " Christian Couder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221025122856.20204-3-christian.couder@gmail.com \
    --to=christian.couder@gmail.com \
    --cc=chriscool@tuxfamily.org \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=johncai86@gmail.com \
    --cc=jonathantanmy@google.com \
    --cc=jrnieder@gmail.com \
    --cc=me@ttaylorr.com \
    --cc=stolee@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).