Git Mailing List Archive mirror
 help / color / mirror / Atom feed
From: Glen Choo <chooglen@google.com>
To: Heather Lapointe via GitGitGadget <gitgitgadget@gmail.com>,
	git@vger.kernel.org
Cc: "René Scharfe" <l.s.r@web.de>,
	"Heather Lapointe" <alpha@alphaservcomputing.solutions>
Subject: Re: [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command
Date: Wed, 26 Oct 2022 16:34:21 -0700	[thread overview]
Message-ID: <kl6lh6zqqhvm.fsf@chooglen-macbookpro.roam.corp.google.com> (raw)
In-Reply-To: <4672e3d958625cd76eb8056ab434e9a37f52661e.1665973401.git.gitgitgadget@gmail.com>

"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:

> index 34549d849f1..f81ef741487 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid,
>  	oidcpy(&d->oid, oid);
>  }
>  
> +static void queue_submodule(
> +		struct repository *superproject,
> +		const struct object_id *oid,
> +		struct strbuf *base, const char *filename,
> +		unsigned mode, struct archiver_context *c)
> +{
> +	struct repository subrepo;
> +
> +	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
> +		return;
> +
> +	if (repo_read_index(&subrepo) < 0)
> +		die("index file corrupt");
> +
> +    queue_directory(oid, base, filename, mode, c);
> +
> +	repo_clear(&subrepo);
> +}
> +

This bit is puzzling to me because we init the submodule, read its
index, and then don't read objects from it at all. How does this work
when we aren't reading objects from the submodule we init here? My guess
is that read_tree() is already doing the heavy lifting of recursing into
submodules, so we don't need to worry any more about init-ing submodules
in archive.c, which is great.

So in effect, this is just checking whether we can read the submodule
and its index. We can drop this check since we already do that check in
read_tree().

What's much more surprising is that you can delete the entire function
body (even queue_directory()!) and the tests still pass! The tests are
definitely testing what they say they are (I've also checked the
tarballs), so I'm not sure what's going on.

I commented out queue_directory() in the S_ISDIR case, and the only test
failures I saw were:

- t5000.68, which uses a glob in its pathspec. I tried using a glob for
  in the archive submodule tests, but I couldn't reproduce the failure.
- t5004.11, which is a really big test case that I didn't bother looking
  deeply into.

So I'm at a loss as to what queue_directory() actually does. My best
guess at a reproduction would be to make a subdirectory in t5000.68 a
submodule. If we do find such a reproducing case, we should add it to
the test suite.

>  static int write_directory(
>  		struct repository *repo,
>  		struct archiver_context *c)
> @@ -228,9 +248,11 @@ static int write_directory(
>  		write_directory(repo, c) ||
>  		write_archive_entry(repo, &d->oid, d->path, d->baselen,
>  				    d->path + d->baselen, d->mode,
> -				    c) != READ_TREE_RECURSIVE;
> +				    c);
>  	free(d);
> -	return ret ? -1 : 0;
> +	if (ret == READ_TREE_RECURSIVE)
> +		return 0;
> +	return ret;
>  }
>  
>  static int queue_or_write_archive_entry(
> @@ -263,6 +285,11 @@ static int queue_or_write_archive_entry(
>  			return 0;
>  		queue_directory(oid, base, filename, mode, c);
>  		return READ_TREE_RECURSIVE;
> +	} else if (c->args->recurse_submodules && S_ISGITLINK(mode)) {
> +		if (is_submodule_active(r, filename)) {
> +			queue_submodule(r, oid, base, filename, mode, c);
> +			return READ_TREE_RECURSIVE;
> +		}

If we are omitting inactive submodules from the archive, we should test
this behavior.

>  	}
>  
>  	if (write_directory(r, c))
> @@ -446,6 +473,7 @@ static void parse_pathspec_arg(
>  		       PATHSPEC_PREFER_FULL,
>  		       "", pathspec);
>  	ar_args->pathspec.recursive = 1;
> +	ar_args->pathspec.recurse_submodules = ar_args->recurse_submodules;
>  	if (pathspec) {
>  		while (*pathspec) {
>  			if (**pathspec && !path_exists(repo, ar_args, *pathspec))
> @@ -609,6 +637,7 @@ static int parse_archive_args(int argc, const char **argv,
>  	int verbose = 0;
>  	int i;
>  	int list = 0;
> +	int recurse_submodules = 0;
>  	int worktree_attributes = 0;
>  	struct option opts[] = {
>  		OPT_GROUP(""),
> @@ -623,6 +652,8 @@ static int parse_archive_args(int argc, const char **argv,
>  		  add_file_cb, (intptr_t)&base },
>  		OPT_STRING('o', "output", &output, N_("file"),
>  			N_("write the archive to this file")),
> +		OPT_BOOL(0, "recurse-submodules", &recurse_submodules,
> +			N_("include submodules in archive")),
>  		OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
>  			N_("read .gitattributes in working directory")),
>  		OPT__VERBOSE(&verbose, N_("report archived files on stderr")),
> @@ -686,6 +717,7 @@ static int parse_archive_args(int argc, const char **argv,
>  	args->verbose = verbose;
>  	args->base = base;
>  	args->baselen = strlen(base);
> +	args->recurse_submodules = recurse_submodules;
>  	args->worktree_attributes = worktree_attributes;
>  
>  	return argc;
> diff --git a/archive.h b/archive.h
> index 540a3b12130..1b21484dda6 100644
> --- a/archive.h
> +++ b/archive.h
> @@ -18,6 +18,7 @@ struct archiver_args {
>  	timestamp_t time;
>  	struct pathspec pathspec;
>  	unsigned int verbose : 1;
> +	unsigned int recurse_submodules : 1;
>  	unsigned int worktree_attributes : 1;
>  	unsigned int convert : 1;
>  	int compression_level;
> -- 
> gitgitgadget

  reply	other threads:[~2022-10-26 23:34 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-12 17:52 [PATCH] archive: add --recurse-submodules to git-archive command Heather Lapointe via GitGitGadget
2022-10-13 11:35 ` [PATCH v2 0/2] archive: Add " Heather Lapointe via GitGitGadget
2022-10-13 11:35   ` [PATCH v2 1/2] archive: add " Alphadelta14 via GitGitGadget
2022-10-13 17:53     ` René Scharfe
2022-10-13 21:37       ` Heather Lapointe
2022-10-13 11:36   ` [PATCH v2 2/2] archive: fix a case of submodule in submodule traversal Alphadelta14 via GitGitGadget
2022-10-13 17:53   ` [PATCH v2 0/2] archive: Add --recurse-submodules to git-archive command René Scharfe
2022-10-13 21:23     ` Heather Lapointe
2022-10-14  9:47       ` René Scharfe
2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods Alphadelta14 via GitGitGadget
2022-10-17 13:26       ` Junio C Hamano
2022-10-26 22:33       ` Glen Choo
2022-10-27 18:09       ` Jonathan Tan
2022-10-27 18:50         ` Junio C Hamano
2022-10-17  2:23     ` [PATCH v3 2/9] tree: update cases to use repo_ tree methods Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 3/9] tree: increase test coverage for tree.c Heather Lapointe via GitGitGadget
2022-10-17 13:34       ` Phillip Wood
2022-10-17 13:36       ` Junio C Hamano
2022-10-27 18:28       ` Jonathan Tan
2022-10-17  2:23     ` [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly Heather Lapointe via GitGitGadget
2022-10-17 13:48       ` Phillip Wood
2022-10-17 13:56       ` Junio C Hamano
2022-10-26 22:48       ` Glen Choo
2022-10-27 18:43       ` Jonathan Tan
2022-10-17  2:23     ` [PATCH v3 5/9] tree: add repository parameter to read_tree_fn_t Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 6/9] archive: pass repo objects to write_archive handlers Heather Lapointe via GitGitGadget
2022-10-17 13:50       ` Phillip Wood
2022-10-17  2:23     ` [PATCH v3 7/9] archive: remove global repository from archive_args Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command Heather Lapointe via GitGitGadget
2022-10-26 23:34       ` Glen Choo [this message]
2022-10-27  7:09         ` René Scharfe
2022-10-27 17:29           ` Glen Choo
2022-10-27 17:30           ` Glen Choo
2022-10-27 17:33           ` Glen Choo
2022-10-17  2:23     ` [PATCH v3 9/9] archive: add tests for git archive --recurse-submodules Heather Lapointe via GitGitGadget
2022-10-27 18:54       ` Jonathan Tan
2022-10-27 23:30         ` Glen Choo
2022-10-28  0:17       ` Ævar Arnfjörð Bjarmason
2022-10-17 13:57     ` [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command Phillip Wood
2022-10-18 18:34     ` Junio C Hamano
2022-10-18 18:48       ` Heather Lapointe
2022-10-19 16:16         ` Junio C Hamano
2022-10-19 20:44           ` Junio C Hamano
2022-10-20  1:21             ` Junio C Hamano
2022-10-21  1:43               ` Junio C Hamano
2022-10-26 22:14     ` Glen Choo
2022-10-28 18:18       ` Heather Lapointe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=kl6lh6zqqhvm.fsf@chooglen-macbookpro.roam.corp.google.com \
    --to=chooglen@google.com \
    --cc=alpha@alphaservcomputing.solutions \
    --cc=git@vger.kernel.org \
    --cc=gitgitgadget@gmail.com \
    --cc=l.s.r@web.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).