Git Mailing List Archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 04/10] sequencer: use the trailer iterator
  2024-01-31  1:22 ` [PATCH v3 " Linus Arver via GitGitGadget
@ 2024-01-31  1:22   ` Linus Arver via GitGitGadget
  2024-02-01 18:06     ` Junio C Hamano
  0 siblings, 1 reply; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-01-31  1:22 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

This patch allows for the removal of "trailer_info_get()" from the
trailer.h API, which will be in the next patch.

Instead of calling "trailer_info_get()", which is a low-level function
in the trailers implementation (trailer.c), call
trailer_iterator_advance(), which was specifically designed for public
consumption in f0939a0eb1 (trailer: add interface for iterating over
commit trailers, 2020-09-27).

Avoiding "trailer_info_get()" means we don't have to worry about options
like "no_divider" (relevant for parsing trailers). We also don't have to
check for things like "info.trailer_start == info.trailer_end" to see
whether there were any trailers (instead we can just check to see
whether the iterator advanced at all).

Also, teach the iterator about non-trailer lines, by adding a new field
called "raw" to hold both trailer and non-trailer lines. This is
necessary because a "trailer block" is a list of trailer lines of at
least 25% trailers (see 146245063e (trailer: allow non-trailers in
trailer block, 2016-10-21)), such that it may hold non-trailer lines.

Signed-off-by: Linus Arver <linusa@google.com>
---
 builtin/shortlog.c |  7 +++++--
 sequencer.c        | 35 +++++++++++++++--------------------
 trailer.c          | 17 +++++++++--------
 trailer.h          | 13 +++++++++++++
 4 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index 1307ed2b88a..dc8fd5a5532 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -172,7 +172,7 @@ static void insert_records_from_trailers(struct shortlog *log,
 					 const char *oneline)
 {
 	struct trailer_iterator iter;
-	const char *commit_buffer, *body;
+	const char *commit_buffer, *body, *value;
 	struct strbuf ident = STRBUF_INIT;
 
 	if (!log->trailers.nr)
@@ -190,7 +190,10 @@ static void insert_records_from_trailers(struct shortlog *log,
 
 	trailer_iterator_init(&iter, body);
 	while (trailer_iterator_advance(&iter)) {
-		const char *value = iter.val.buf;
+		if (!iter.is_trailer)
+			continue;
+
+		value = iter.val.buf;
 
 		if (!string_list_has_string(&log->trailers, iter.key.buf))
 			continue;
diff --git a/sequencer.c b/sequencer.c
index 3cc88d8a800..bc7c82c5271 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -319,37 +319,32 @@ static const char *get_todo_path(const struct replay_opts *opts)
 static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob,
 	size_t ignore_footer)
 {
-	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
-	struct trailer_info info;
-	size_t i;
-	int found_sob = 0, found_sob_last = 0;
-	char saved_char;
-
-	opts.no_divider = 1;
+	struct trailer_iterator iter;
+	size_t i = 0, found_sob = 0;
+	char saved_char = sb->buf[sb->len - ignore_footer];
 
 	if (ignore_footer) {
-		saved_char = sb->buf[sb->len - ignore_footer];
 		sb->buf[sb->len - ignore_footer] = '\0';
 	}
 
-	trailer_info_get(&info, sb->buf, &opts);
+	trailer_iterator_init(&iter, sb->buf);
+	while (trailer_iterator_advance(&iter)) {
+		i++;
+		if (sob &&
+		    iter.is_trailer &&
+		    !strncmp(iter.raw, sob->buf, sob->len)) {
+			found_sob = i;
+		}
+	}
+	trailer_iterator_release(&iter);
 
 	if (ignore_footer)
 		sb->buf[sb->len - ignore_footer] = saved_char;
 
-	if (info.trailer_block_start == info.trailer_block_end)
+	if (!i)
 		return 0;
 
-	for (i = 0; i < info.trailer_nr; i++)
-		if (sob && !strncmp(info.trailers[i], sob->buf, sob->len)) {
-			found_sob = 1;
-			if (i == info.trailer_nr - 1)
-				found_sob_last = 1;
-		}
-
-	trailer_info_release(&info);
-
-	if (found_sob_last)
+	if (found_sob == i)
 		return 3;
 	if (found_sob)
 		return 2;
diff --git a/trailer.c b/trailer.c
index 71ea2bb67f8..5bcc9b0006c 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1158,17 +1158,18 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	while (iter->internal.cur < iter->internal.info.trailer_nr) {
-		char *trailer = iter->internal.info.trailers[iter->internal.cur++];
-		int separator_pos = find_separator(trailer, separators);
-
-		if (separator_pos < 1)
-			continue; /* not a real trailer */
-
+	char *line;
+	int separator_pos;
+	if (iter->internal.cur < iter->internal.info.trailer_nr) {
+		line = iter->internal.info.trailers[iter->internal.cur++];
+		separator_pos = find_separator(line, separators);
+		iter->is_trailer = (separator_pos > 0);
+
+		iter->raw = line;
 		strbuf_reset(&iter->key);
 		strbuf_reset(&iter->val);
 		parse_trailer(&iter->key, &iter->val, NULL,
-			      trailer, separator_pos);
+			      line, separator_pos);
 		unfold_value(&iter->val);
 		return 1;
 	}
diff --git a/trailer.h b/trailer.h
index 244f29fc91f..a7599067acc 100644
--- a/trailer.h
+++ b/trailer.h
@@ -127,6 +127,19 @@ struct trailer_iterator {
 	struct strbuf key;
 	struct strbuf val;
 
+	/*
+	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
+	 * key/val pair as part of a trailer block. A trailer block can be
+	 * either 100% trailer lines, or mixed in with non-trailer lines (in
+	 * which case at least 25% must be trailer lines).
+	 */
+	const char *raw;
+
+	/*
+	 * 1 if the raw line was parsed as a trailer line (key/val pair).
+	 */
+	int is_trailer;
+
 	/* private */
 	struct {
 		struct trailer_info info;
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* Re: [PATCH v3 04/10] sequencer: use the trailer iterator
  2024-01-31  1:22   ` [PATCH v3 04/10] sequencer: use the trailer iterator Linus Arver via GitGitGadget
@ 2024-02-01 18:06     ` Junio C Hamano
  2024-02-01 19:14       ` Linus Arver
  0 siblings, 1 reply; 67+ messages in thread
From: Junio C Hamano @ 2024-02-01 18:06 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Linus Arver

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Linus Arver <linusa@google.com>
>
> This patch allows for the removal of "trailer_info_get()" from the
> trailer.h API, which will be in the next patch.

Hmph, do you mean "shortlog" and the sequencer were the only two
external callers and with this we can make it file-scope static to
trailer.c?  Or do you mean the next step will be more than a removal
of a declaration from trailer.h plus adding "static" in front of its
definition in trailer.c, because there need other adjustments before
that happens?

> Instead of calling "trailer_info_get()", which is a low-level function
> in the trailers implementation (trailer.c), call
> trailer_iterator_advance(), which was specifically designed for public
> consumption in f0939a0eb1 (trailer: add interface for iterating over
> commit trailers, 2020-09-27).

;-).

> Also, teach the iterator about non-trailer lines, by adding a new field
> called "raw" to hold both trailer and non-trailer lines. This is
> necessary because a "trailer block" is a list of trailer lines of at
> least 25% trailers (see 146245063e (trailer: allow non-trailers in
> trailer block, 2016-10-21)), such that it may hold non-trailer lines.

That sounds like a task larger than something we would want in a
patch that focuses on another task (e.g. update sequencer not to
call trailer_info_get()) while at it.  It seems from a casual glance
that the change to shortlog.c is to accomodate this change in the
semantics of what the iterator could return?  It smells that this
patch does two more or less unrelated things at the same time?

> Signed-off-by: Linus Arver <linusa@google.com>
> ---
>  builtin/shortlog.c |  7 +++++--
>  sequencer.c        | 35 +++++++++++++++--------------------
>  trailer.c          | 17 +++++++++--------
>  trailer.h          | 13 +++++++++++++
>  4 files changed, 42 insertions(+), 30 deletions(-)
>
> diff --git a/builtin/shortlog.c b/builtin/shortlog.c
> index 1307ed2b88a..dc8fd5a5532 100644
> --- a/builtin/shortlog.c
> +++ b/builtin/shortlog.c
> @@ -172,7 +172,7 @@ static void insert_records_from_trailers(struct shortlog *log,
>  					 const char *oneline)
>  {
>  	struct trailer_iterator iter;
> -	const char *commit_buffer, *body;
> +	const char *commit_buffer, *body, *value;
>  	struct strbuf ident = STRBUF_INIT;
>  
>  	if (!log->trailers.nr)
> @@ -190,7 +190,10 @@ static void insert_records_from_trailers(struct shortlog *log,
>  
>  	trailer_iterator_init(&iter, body);
>  	while (trailer_iterator_advance(&iter)) {
> -		const char *value = iter.val.buf;
> +		if (!iter.is_trailer)
> +			continue;
> +
> +		value = iter.val.buf;
>  
>  		if (!string_list_has_string(&log->trailers, iter.key.buf))
>  			continue;
> diff --git a/sequencer.c b/sequencer.c
> index 3cc88d8a800..bc7c82c5271 100644
> --- a/sequencer.c
> +++ b/sequencer.c
> @@ -319,37 +319,32 @@ static const char *get_todo_path(const struct replay_opts *opts)
>  static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob,
>  	size_t ignore_footer)
>  {
> -	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
> -	struct trailer_info info;
> -	size_t i;
> -	int found_sob = 0, found_sob_last = 0;
> -	char saved_char;
> -
> -	opts.no_divider = 1;
> +	struct trailer_iterator iter;
> +	size_t i = 0, found_sob = 0;
> +	char saved_char = sb->buf[sb->len - ignore_footer];
>  
>  	if (ignore_footer) {
> -		saved_char = sb->buf[sb->len - ignore_footer];
>  		sb->buf[sb->len - ignore_footer] = '\0';
>  	}
>  
> -	trailer_info_get(&info, sb->buf, &opts);
> +	trailer_iterator_init(&iter, sb->buf);
> +	while (trailer_iterator_advance(&iter)) {
> +		i++;
> +		if (sob &&
> +		    iter.is_trailer &&
> +		    !strncmp(iter.raw, sob->buf, sob->len)) {
> +			found_sob = i;
> +		}
> +	}
> +	trailer_iterator_release(&iter);
>  
>  	if (ignore_footer)
>  		sb->buf[sb->len - ignore_footer] = saved_char;
>  
> -	if (info.trailer_block_start == info.trailer_block_end)
> +	if (!i)
>  		return 0;
>  
> -	for (i = 0; i < info.trailer_nr; i++)
> -		if (sob && !strncmp(info.trailers[i], sob->buf, sob->len)) {
> -			found_sob = 1;
> -			if (i == info.trailer_nr - 1)
> -				found_sob_last = 1;
> -		}
> -
> -	trailer_info_release(&info);
> -
> -	if (found_sob_last)
> +	if (found_sob == i)
>  		return 3;
>  	if (found_sob)
>  		return 2;
> diff --git a/trailer.c b/trailer.c
> index 71ea2bb67f8..5bcc9b0006c 100644
> --- a/trailer.c
> +++ b/trailer.c
> @@ -1158,17 +1158,18 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
>  
>  int trailer_iterator_advance(struct trailer_iterator *iter)
>  {
> -	while (iter->internal.cur < iter->internal.info.trailer_nr) {
> -		char *trailer = iter->internal.info.trailers[iter->internal.cur++];
> -		int separator_pos = find_separator(trailer, separators);
> -
> -		if (separator_pos < 1)
> -			continue; /* not a real trailer */
> -
> +	char *line;
> +	int separator_pos;
> +	if (iter->internal.cur < iter->internal.info.trailer_nr) {
> +		line = iter->internal.info.trailers[iter->internal.cur++];
> +		separator_pos = find_separator(line, separators);
> +		iter->is_trailer = (separator_pos > 0);
> +
> +		iter->raw = line;
>  		strbuf_reset(&iter->key);
>  		strbuf_reset(&iter->val);
>  		parse_trailer(&iter->key, &iter->val, NULL,
> -			      trailer, separator_pos);
> +			      line, separator_pos);
>  		unfold_value(&iter->val);
>  		return 1;
>  	}
> diff --git a/trailer.h b/trailer.h
> index 244f29fc91f..a7599067acc 100644
> --- a/trailer.h
> +++ b/trailer.h
> @@ -127,6 +127,19 @@ struct trailer_iterator {
>  	struct strbuf key;
>  	struct strbuf val;
>  
> +	/*
> +	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
> +	 * key/val pair as part of a trailer block. A trailer block can be
> +	 * either 100% trailer lines, or mixed in with non-trailer lines (in
> +	 * which case at least 25% must be trailer lines).
> +	 */
> +	const char *raw;
> +
> +	/*
> +	 * 1 if the raw line was parsed as a trailer line (key/val pair).
> +	 */
> +	int is_trailer;
> +
>  	/* private */
>  	struct {
>  		struct trailer_info info;

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v3 04/10] sequencer: use the trailer iterator
  2024-02-01 18:06     ` Junio C Hamano
@ 2024-02-01 19:14       ` Linus Arver
  2024-02-03  0:39         ` Linus Arver
  0 siblings, 1 reply; 67+ messages in thread
From: Linus Arver @ 2024-02-01 19:14 UTC (permalink / raw)
  To: Junio C Hamano, Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker

Junio C Hamano <gitster@pobox.com> writes:

> "Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> From: Linus Arver <linusa@google.com>
>>
>> This patch allows for the removal of "trailer_info_get()" from the
>> trailer.h API, which will be in the next patch.
>
> Hmph, do you mean "shortlog" and the sequencer were the only two
> external callers and with this we can make it file-scope static to
> trailer.c?

This was what I meant (originally ...

> Or do you mean the next step will be more than a removal
> of a declaration from trailer.h plus adding "static" in front of its
> definition in trailer.c, because there need other adjustments before
> that happens?

... but now I realize that the operation adds a few small tweaks, such
as tweaking the parameters it expects and also what it returns). In the
spirit of breaking up patch 3, I will also break this up into
preparatory patches as well.

>> Also, teach the iterator about non-trailer lines, by adding a new field
>> called "raw" to hold both trailer and non-trailer lines. This is
>> necessary because a "trailer block" is a list of trailer lines of at
>> least 25% trailers (see 146245063e (trailer: allow non-trailers in
>> trailer block, 2016-10-21)), such that it may hold non-trailer lines.
>
> That sounds like a task larger than something we would want in a
> patch that focuses on another task (e.g. update sequencer not to
> call trailer_info_get()) while at it.  It seems from a casual glance
> that the change to shortlog.c is to accomodate this change in the
> semantics of what the iterator could return?  It smells that this
> patch does two more or less unrelated things at the same time?

I think you're correct. Hopefully breaking this up will make things
easier to review.

I am learning very quickly from your review comments in patch 3 and in
here that, in the absence of area experts, the existing tests/CI tests
cannot be trusted alone (after all some tests could be missing), which
makes it more important to do so-called "micro-commits".

But overall, breaking things up is a good thing anyway as a general
practice, so, I think this is a good lesson. TBH I have a (bad) habit of
saying "is the diff ~100 lines?" and if so I don't spend any time
thinking of breaking these up. X-<

Thanks.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v3 04/10] sequencer: use the trailer iterator
  2024-02-01 19:14       ` Linus Arver
@ 2024-02-03  0:39         ` Linus Arver
  0 siblings, 0 replies; 67+ messages in thread
From: Linus Arver @ 2024-02-03  0:39 UTC (permalink / raw)
  To: Junio C Hamano, Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker

Linus Arver <linusa@google.com> writes:

> Junio C Hamano <gitster@pobox.com> writes:
>
>> "Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:
>>
>>> From: Linus Arver <linusa@google.com>
>>>
>>> Also, teach the iterator about non-trailer lines, by adding a new field
>>> called "raw" to hold both trailer and non-trailer lines. This is
>>> necessary because a "trailer block" is a list of trailer lines of at
>>> least 25% trailers (see 146245063e (trailer: allow non-trailers in
>>> trailer block, 2016-10-21)), such that it may hold non-trailer lines.
>>
>> That sounds like a task larger than something we would want in a
>> patch that focuses on another task (e.g. update sequencer not to
>> call trailer_info_get()) while at it.  It seems from a casual glance
>> that the change to shortlog.c is to accomodate this change in the
>> semantics of what the iterator could return?  It smells that this
>> patch does two more or less unrelated things at the same time?
>
> I think you're correct. Hopefully breaking this up will make things
> easier to review.

And now that I've broken it up locally, I can say that the change I made
to shortlog was unnecessary (shortlog already has a check to see if the
trailer key is empty) which makes the "is_trailer" check I added to it
here redundant (because non-trailer lines, which the new iterator can
iterate over, have empty keys).

Will remove the shortlog change in v4. Thanks.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup)
@ 2024-03-16  6:27 Linus Arver via GitGitGadget
  2024-03-16  6:27 ` [PATCH 1/6] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
                   ` (8 more replies)
  0 siblings, 9 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-03-16  6:27 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver

NOTE: This series is based on the la/format-trailer-info topic branch (see
its discussion at [1]).

This series is based on the initial series [2], notably the v4 version of
patches 17-20 as suggested by Christian [3]. This version addresses the
review comments for those patches, namely the splitting up of Patch 19 there
into 3 separate patches [4] (as Patches 03-05 here) .

The central idea is to make the trailer_info struct private (that is, move
its definition from trailer.h to trailer.c) --- aka the "pimpl" idiom. See
the detailed commit message for Patch 05 for the motivation behind the
change.

Patch 02 makes sequencer.c a well-behaved trailer API consumer, by making
use of the trailer iterator. Patch 01 prepares us for Patch 02. Patch 06
slightly reduces the weight of the API by removing (from the API surface) an
unused function.

[1]
https://lore.kernel.org/git/pull.1694.git.1710485706.gitgitgadget@gmail.com/
[2]
https://lore.kernel.org/git/pull.1632.v4.git.1707196348.gitgitgadget@gmail.com/
[3]
https://lore.kernel.org/git/CAP8UFD08F0V13X0+CJ1uhMPzPWVMs2okGVMJch0DkQg5M3BWLA@mail.gmail.com/
[4]
https://lore.kernel.org/git/CAP8UFD1twELGKvvesxgCrZrypKZpgSt04ira3mvurG1UbpDfxQ@mail.gmail.com/

Linus Arver (6):
  trailer: teach iterator about non-trailer lines
  sequencer: use the trailer iterator
  interpret-trailers: access trailer_info with new helpers
  trailer: make parse_trailers() return trailer_info pointer
  trailer: make trailer_info struct private
  trailer: retire trailer_info_get() from API

 builtin/interpret-trailers.c |  12 +--
 sequencer.c                  |  27 +++---
 trailer.c                    | 161 ++++++++++++++++++++++-------------
 trailer.h                    |  46 ++++------
 4 files changed, 137 insertions(+), 109 deletions(-)


base-commit: 3452d173241c8b87ecdd67f91f594cb14327e394
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1696%2Flistx%2Ftrailer-api-part-3-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1696/listx/trailer-api-part-3-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/1696
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 67+ messages in thread

* [PATCH 1/6] trailer: teach iterator about non-trailer lines
  2024-03-16  6:27 [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Linus Arver via GitGitGadget
@ 2024-03-16  6:27 ` Linus Arver via GitGitGadget
  2024-03-16  6:27 ` [PATCH 2/6] sequencer: use the trailer iterator Linus Arver via GitGitGadget
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-03-16  6:27 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Previously the iterator did not iterate over non-trailer lines. This was
somewhat unfortunate, because trailer blocks could have non-trailer
lines in them since 146245063e (trailer: allow non-trailers in trailer
block, 2016-10-21), which was before the iterator was created in
f0939a0eb1 (trailer: add interface for iterating over commit trailers,
2020-09-27).

So if trailer API users wanted to iterate over all lines in a trailer
block (including non-trailer lines), they could not use the iterator and
were forced to use the lower-level trailer_info struct directly (which
provides a raw string array that includes all lines in the trailer
block).

Change the iterator's behavior so that we also iterate over non-trailer
lines, instead of skipping over them. The new "raw" member of the
iterator allows API users to access previously inaccessible non-trailer
lines. Reword the variable "trailer" to just "line" because this
variable can now hold both trailer lines _and_ non-trailer lines.

The new "raw" member is important because anyone currently not using the
iterator is using trailer_info's raw string array directly to access
lines to check what the combined key + value looks like. If we didn't
provide a "raw" member here, iterator users would have to re-construct
the unparsed line by concatenating the key and value back together again
--- which places an undue burden for iterator users.

The next commit demonstrates the use of the iterator in sequencer.c as an
example of where "raw" will be useful, so that it can start using the
iterator.

For the existing use of the iterator in builtin/shortlog.c, we don't
have to change the code there because that code does

    trailer_iterator_init(&iter, body);
    while (trailer_iterator_advance(&iter)) {
        const char *value = iter.val.buf;

        if (!string_list_has_string(&log->trailers, iter.key.buf))
            continue;

        ...

and the

        if (!string_list_has_string(&log->trailers, iter.key.buf))

condition already skips over non-trailer lines (iter.key.buf is empty
for non-trailer lines, making the comparison still work even with this
commit).

Signed-off-by: Linus Arver <linusa@google.com>
---
 trailer.c | 12 +++++-------
 trailer.h |  8 ++++++++
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/trailer.c b/trailer.c
index 3e4dab9c065..4700c441442 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1146,17 +1146,15 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	while (iter->internal.cur < iter->internal.info.trailer_nr) {
-		char *trailer = iter->internal.info.trailers[iter->internal.cur++];
-		int separator_pos = find_separator(trailer, separators);
-
-		if (separator_pos < 1)
-			continue; /* not a real trailer */
+	if (iter->internal.cur < iter->internal.info.trailer_nr) {
+		char *line = iter->internal.info.trailers[iter->internal.cur++];
+		int separator_pos = find_separator(line, separators);
 
+		iter->raw = line;
 		strbuf_reset(&iter->key);
 		strbuf_reset(&iter->val);
 		parse_trailer(&iter->key, &iter->val, NULL,
-			      trailer, separator_pos);
+			      line, separator_pos);
 		/* Always unfold values during iteration. */
 		unfold_value(&iter->val);
 		return 1;
diff --git a/trailer.h b/trailer.h
index 9f42aa75994..ebafa3657e4 100644
--- a/trailer.h
+++ b/trailer.h
@@ -125,6 +125,14 @@ void format_trailers_from_commit(const struct process_trailer_options *,
  *   trailer_iterator_release(&iter);
  */
 struct trailer_iterator {
+	/*
+	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
+	 * key/val pair as part of a trailer block. A trailer block can be
+	 * either 100% trailer lines, or mixed in with non-trailer lines (in
+	 * which case at least 25% must be trailer lines).
+	 */
+	const char *raw;
+
 	struct strbuf key;
 	struct strbuf val;
 
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH 2/6] sequencer: use the trailer iterator
  2024-03-16  6:27 [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Linus Arver via GitGitGadget
  2024-03-16  6:27 ` [PATCH 1/6] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
@ 2024-03-16  6:27 ` Linus Arver via GitGitGadget
  2024-03-16  6:27 ` [PATCH 3/6] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-03-16  6:27 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Instead of calling "trailer_info_get()", which is a low-level function
in the trailers implementation (trailer.c), call
trailer_iterator_advance(), which was specifically designed for public
consumption in f0939a0eb1 (trailer: add interface for iterating over
commit trailers, 2020-09-27).

Avoiding "trailer_info_get()" means we don't have to worry about options
like "no_divider" (relevant for parsing trailers). We also don't have to
check for things like "info.trailer_start == info.trailer_end" to see
whether there were any trailers (instead we can just check to see
whether the iterator advanced at all).

Note how we have to use "iter.raw" in order to get the same behavior as
before when we iterated over the unparsed string array (char **trailers)
in trailer_info.

Signed-off-by: Linus Arver <linusa@google.com>
---
 sequencer.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/sequencer.c b/sequencer.c
index ea1441e6174..4c1f6c675e7 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -319,35 +319,32 @@ static const char *get_todo_path(const struct replay_opts *opts)
 static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob,
 	size_t ignore_footer)
 {
-	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
-	struct trailer_info info;
-	size_t i;
+	struct trailer_iterator iter;
+	size_t i = 0;
 	int found_sob = 0, found_sob_last = 0;
 	char saved_char;
 
-	opts.no_divider = 1;
-
 	if (ignore_footer) {
 		saved_char = sb->buf[sb->len - ignore_footer];
 		sb->buf[sb->len - ignore_footer] = '\0';
 	}
 
-	trailer_info_get(&opts, sb->buf, &info);
+	trailer_iterator_init(&iter, sb->buf);
 
 	if (ignore_footer)
 		sb->buf[sb->len - ignore_footer] = saved_char;
 
-	if (info.trailer_block_start == info.trailer_block_end)
-		return 0;
+	while (trailer_iterator_advance(&iter)) {
+		i++;
+		if (sob && !strncmp(iter.raw, sob->buf, sob->len))
+			found_sob = i;
+	}
+	trailer_iterator_release(&iter);
 
-	for (i = 0; i < info.trailer_nr; i++)
-		if (sob && !strncmp(info.trailers[i], sob->buf, sob->len)) {
-			found_sob = 1;
-			if (i == info.trailer_nr - 1)
-				found_sob_last = 1;
-		}
+	if (!i)
+		return 0;
 
-	trailer_info_release(&info);
+	found_sob_last = (int)i == found_sob;
 
 	if (found_sob_last)
 		return 3;
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH 3/6] interpret-trailers: access trailer_info with new helpers
  2024-03-16  6:27 [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Linus Arver via GitGitGadget
  2024-03-16  6:27 ` [PATCH 1/6] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
  2024-03-16  6:27 ` [PATCH 2/6] sequencer: use the trailer iterator Linus Arver via GitGitGadget
@ 2024-03-16  6:27 ` Linus Arver via GitGitGadget
  2024-03-16  6:27 ` [PATCH 4/6] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-03-16  6:27 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Instead of directly accessing trailer_info members, access them
indirectly through new helper functions exposed by the trailer API.

This is the first of two preparatory commits which will allow us to
use the so-called "pimpl" (pointer to implementation) idiom for the
trailer API, by making the trailer_info struct private to the trailer
implementation (and thus hidden from the API).

Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linusa@google.com>
---
 builtin/interpret-trailers.c | 12 ++++++------
 trailer.c                    | 21 +++++++++++++++++++++
 trailer.h                    |  4 ++++
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/builtin/interpret-trailers.c b/builtin/interpret-trailers.c
index 11f4ce9e4a2..f3240682e35 100644
--- a/builtin/interpret-trailers.c
+++ b/builtin/interpret-trailers.c
@@ -141,7 +141,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	LIST_HEAD(head);
 	struct strbuf sb = STRBUF_INIT;
 	struct strbuf trailer_block = STRBUF_INIT;
-	struct trailer_info info;
+	struct trailer_info *info = trailer_info_new();
 	FILE *outfile = stdout;
 
 	trailer_config_init();
@@ -151,13 +151,13 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	if (opts->in_place)
 		outfile = create_in_place_tempfile(file);
 
-	parse_trailers(opts, &info, sb.buf, &head);
+	parse_trailers(opts, info, sb.buf, &head);
 
 	/* Print the lines before the trailers */
 	if (!opts->only_trailers)
-		fwrite(sb.buf, 1, info.trailer_block_start, outfile);
+		fwrite(sb.buf, 1, trailer_block_start(info), outfile);
 
-	if (!opts->only_trailers && !info.blank_line_before_trailer)
+	if (!opts->only_trailers && !blank_line_before_trailer_block(info))
 		fprintf(outfile, "\n");
 
 
@@ -178,8 +178,8 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 
 	/* Print the lines after the trailers as is */
 	if (!opts->only_trailers)
-		fwrite(sb.buf + info.trailer_block_end, 1, sb.len - info.trailer_block_end, outfile);
-	trailer_info_release(&info);
+		fwrite(sb.buf + trailer_block_end(info), 1, sb.len - trailer_block_end(info), outfile);
+	trailer_info_release(info);
 
 	if (opts->in_place)
 		if (rename_tempfile(&trailers_tempfile, file))
diff --git a/trailer.c b/trailer.c
index 4700c441442..95b4c9b8f19 100644
--- a/trailer.c
+++ b/trailer.c
@@ -952,6 +952,12 @@ static void unfold_value(struct strbuf *val)
 	strbuf_release(&out);
 }
 
+struct trailer_info *trailer_info_new(void)
+{
+	struct trailer_info *info = xcalloc(1, sizeof(*info));
+	return info;
+}
+
 /*
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
@@ -1000,6 +1006,21 @@ void free_trailers(struct list_head *trailers)
 	}
 }
 
+size_t trailer_block_start(struct trailer_info *info)
+{
+	return info->trailer_block_start;
+}
+
+size_t trailer_block_end(struct trailer_info *info)
+{
+	return info->trailer_block_end;
+}
+
+int blank_line_before_trailer_block(struct trailer_info *info)
+{
+	return info->blank_line_before_trailer;
+}
+
 void trailer_info_get(const struct process_trailer_options *opts,
 		      const char *str,
 		      struct trailer_info *info)
diff --git a/trailer.h b/trailer.h
index ebafa3657e4..9ac4be853c5 100644
--- a/trailer.h
+++ b/trailer.h
@@ -97,6 +97,10 @@ void parse_trailers(const struct process_trailer_options *,
 void trailer_info_get(const struct process_trailer_options *,
 		      const char *str,
 		      struct trailer_info *);
+size_t trailer_block_start(struct trailer_info *);
+size_t trailer_block_end(struct trailer_info *);
+int blank_line_before_trailer_block(struct trailer_info *);
+struct trailer_info *trailer_info_new(void);
 
 void trailer_info_release(struct trailer_info *info);
 
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH 4/6] trailer: make parse_trailers() return trailer_info pointer
  2024-03-16  6:27 [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Linus Arver via GitGitGadget
                   ` (2 preceding siblings ...)
  2024-03-16  6:27 ` [PATCH 3/6] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
@ 2024-03-16  6:27 ` Linus Arver via GitGitGadget
  2024-03-16  6:27 ` [PATCH 5/6] trailer: make trailer_info struct private Linus Arver via GitGitGadget
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-03-16  6:27 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

This is the second and final preparatory commit for making the
trailer_info struct private to the trailer implementation.

Make trailer_info_get() do the actual work of allocating a new
trailer_info struct, and return a pointer to it. Because
parse_trailers() wraps around trailer_info_get(), it too can return this
pointer to the caller. From the trailer API user's perspective, the call
to trailer_info_new() can be replaced with parse_trailers(); do so in
interpret-trailers.

Because trailer_info_new() is no longer called by interpret-trailers,
remove this function from the trailer API.

With this change, we no longer allocate trailer_info on the stack ---
all uses of it are via a pointer where the actual data is always
allocated at runtime through trailer_info_new(). Make
trailer_info_release() free this dynamically allocated memory.

Finally, due to the way the function signatures of parse_trailers() and
trailer_info_get() have changed, update the callsites in
format_trailers_from_commit() and trailer_iterator_init() accordingly.

Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linusa@google.com>
---
 builtin/interpret-trailers.c |  4 ++--
 trailer.c                    | 41 +++++++++++++++++++-----------------
 trailer.h                    | 17 ++++++---------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/builtin/interpret-trailers.c b/builtin/interpret-trailers.c
index f3240682e35..6bf8cec005a 100644
--- a/builtin/interpret-trailers.c
+++ b/builtin/interpret-trailers.c
@@ -141,7 +141,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	LIST_HEAD(head);
 	struct strbuf sb = STRBUF_INIT;
 	struct strbuf trailer_block = STRBUF_INIT;
-	struct trailer_info *info = trailer_info_new();
+	struct trailer_info *info;
 	FILE *outfile = stdout;
 
 	trailer_config_init();
@@ -151,7 +151,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	if (opts->in_place)
 		outfile = create_in_place_tempfile(file);
 
-	parse_trailers(opts, info, sb.buf, &head);
+	info = parse_trailers(opts, sb.buf, &head);
 
 	/* Print the lines before the trailers */
 	if (!opts->only_trailers)
diff --git a/trailer.c b/trailer.c
index 95b4c9b8f19..9179dd802c6 100644
--- a/trailer.c
+++ b/trailer.c
@@ -952,7 +952,7 @@ static void unfold_value(struct strbuf *val)
 	strbuf_release(&out);
 }
 
-struct trailer_info *trailer_info_new(void)
+static struct trailer_info *trailer_info_new(void)
 {
 	struct trailer_info *info = xcalloc(1, sizeof(*info));
 	return info;
@@ -962,16 +962,16 @@ struct trailer_info *trailer_info_new(void)
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
  */
-void parse_trailers(const struct process_trailer_options *opts,
-		    struct trailer_info *info,
-		    const char *str,
-		    struct list_head *head)
+struct trailer_info *parse_trailers(const struct process_trailer_options *opts,
+				    const char *str,
+				    struct list_head *head)
 {
+	struct trailer_info *info;
 	struct strbuf tok = STRBUF_INIT;
 	struct strbuf val = STRBUF_INIT;
 	size_t i;
 
-	trailer_info_get(opts, str, info);
+	info = trailer_info_get(opts, str);
 
 	for (i = 0; i < info->trailer_nr; i++) {
 		int separator_pos;
@@ -995,6 +995,8 @@ void parse_trailers(const struct process_trailer_options *opts,
 					 strbuf_detach(&val, NULL));
 		}
 	}
+
+	return info;
 }
 
 void free_trailers(struct list_head *trailers)
@@ -1021,10 +1023,10 @@ int blank_line_before_trailer_block(struct trailer_info *info)
 	return info->blank_line_before_trailer;
 }
 
-void trailer_info_get(const struct process_trailer_options *opts,
-		      const char *str,
-		      struct trailer_info *info)
+struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
+				      const char *str)
 {
+	struct trailer_info *info = trailer_info_new();
 	size_t end_of_log_message = 0, trailer_block_start = 0;
 	struct strbuf **trailer_lines, **ptr;
 	char **trailer_strings = NULL;
@@ -1063,6 +1065,8 @@ void trailer_info_get(const struct process_trailer_options *opts,
 	info->trailer_block_end = end_of_log_message;
 	info->trailers = trailer_strings;
 	info->trailer_nr = nr;
+
+	return info;
 }
 
 void trailer_info_release(struct trailer_info *info)
@@ -1071,6 +1075,7 @@ void trailer_info_release(struct trailer_info *info)
 	for (i = 0; i < info->trailer_nr; i++)
 		free(info->trailers[i]);
 	free(info->trailers);
+	free(info);
 }
 
 void format_trailers(const struct process_trailer_options *opts,
@@ -1138,21 +1143,19 @@ void format_trailers_from_commit(const struct process_trailer_options *opts,
 				 struct strbuf *out)
 {
 	LIST_HEAD(trailer_objects);
-	struct trailer_info info;
-
-	parse_trailers(opts, &info, msg, &trailer_objects);
+	struct trailer_info *info = parse_trailers(opts, msg, &trailer_objects);
 
 	/* If we want the whole block untouched, we can take the fast path. */
 	if (!opts->only_trailers && !opts->unfold && !opts->filter &&
 	    !opts->separator && !opts->key_only && !opts->value_only &&
 	    !opts->key_value_separator) {
-		strbuf_add(out, msg + info.trailer_block_start,
-			   info.trailer_block_end - info.trailer_block_start);
+		strbuf_add(out, msg + info->trailer_block_start,
+			   info->trailer_block_end - info->trailer_block_start);
 	} else
 		format_trailers(opts, &trailer_objects, out);
 
 	free_trailers(&trailer_objects);
-	trailer_info_release(&info);
+	trailer_info_release(info);
 }
 
 void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
@@ -1161,14 +1164,14 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 	strbuf_init(&iter->key, 0);
 	strbuf_init(&iter->val, 0);
 	opts.no_divider = 1;
-	trailer_info_get(&opts, msg, &iter->internal.info);
+	iter->internal.info = trailer_info_get(&opts, msg);
 	iter->internal.cur = 0;
 }
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	if (iter->internal.cur < iter->internal.info.trailer_nr) {
-		char *line = iter->internal.info.trailers[iter->internal.cur++];
+	if (iter->internal.cur < iter->internal.info->trailer_nr) {
+		char *line = iter->internal.info->trailers[iter->internal.cur++];
 		int separator_pos = find_separator(line, separators);
 
 		iter->raw = line;
@@ -1185,7 +1188,7 @@ int trailer_iterator_advance(struct trailer_iterator *iter)
 
 void trailer_iterator_release(struct trailer_iterator *iter)
 {
-	trailer_info_release(&iter->internal.info);
+	trailer_info_release(iter->internal.info);
 	strbuf_release(&iter->val);
 	strbuf_release(&iter->key);
 }
diff --git a/trailer.h b/trailer.h
index 9ac4be853c5..b32213a9e23 100644
--- a/trailer.h
+++ b/trailer.h
@@ -89,18 +89,15 @@ void parse_trailers_from_command_line_args(struct list_head *arg_head,
 void process_trailers_lists(struct list_head *head,
 			    struct list_head *arg_head);
 
-void parse_trailers(const struct process_trailer_options *,
-		    struct trailer_info *,
-		    const char *str,
-		    struct list_head *head);
-
-void trailer_info_get(const struct process_trailer_options *,
-		      const char *str,
-		      struct trailer_info *);
+struct trailer_info *parse_trailers(const struct process_trailer_options *,
+				    const char *str,
+				    struct list_head *head);
+struct trailer_info *trailer_info_get(const struct process_trailer_options *,
+				      const char *str);
+
 size_t trailer_block_start(struct trailer_info *);
 size_t trailer_block_end(struct trailer_info *);
 int blank_line_before_trailer_block(struct trailer_info *);
-struct trailer_info *trailer_info_new(void);
 
 void trailer_info_release(struct trailer_info *info);
 
@@ -142,7 +139,7 @@ struct trailer_iterator {
 
 	/* private */
 	struct {
-		struct trailer_info info;
+		struct trailer_info *info;
 		size_t cur;
 	} internal;
 };
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH 5/6] trailer: make trailer_info struct private
  2024-03-16  6:27 [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Linus Arver via GitGitGadget
                   ` (3 preceding siblings ...)
  2024-03-16  6:27 ` [PATCH 4/6] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
@ 2024-03-16  6:27 ` Linus Arver via GitGitGadget
  2024-03-16  6:27 ` [PATCH 6/6] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-03-16  6:27 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

In 13211ae23f (trailer: separate public from internal portion of
trailer_iterator, 2023-09-09) we moved trailer_info behind an anonymous
struct to discourage use by trailer.h API users. However it still left
open the possibility of external use of trailer_info itself. Now that
there are no external users of trailer_info, we can make this struct
private.

Make this struct private by putting its definition inside trailer.c.
This has two benefits:

  (1) it makes the surface area of the public facing
      interface (trailer.h) smaller, and

  (2) external API users are unable to peer inside this struct (because
      it is only ever exposed as an opaque pointer).

There are a couple disadvantages:

  (A) every time the member of the struct is accessed an extra pointer
      dereference must be done, and

  (B) for users of trailer_info outside trailer.c, this struct can no
      longer be allocated on the stack and may only be allocated on the
      heap (because its definition is hidden away in trailer.c) and
      appropriately deallocated by the user.

(The disadvantages have already been observed in the two preparatory
commits that precede this one.) This commit believes that the benefits
outweigh the disadvantages for designing APIs, as explained below.

Making trailer_info private exposes existing deficiencies in the API.
This is because users of this struct had full access to its internals,
so there wasn't much need to actually design it to be "complete" in the
sense that API users only needed to use what was provided by the API.
For example, the location of the trailer block (start/end offsets
relative to the start of the input text) was accessible by looking at
these struct members directly. Now that the struct is private, we have
to expose new API functions to allow clients to access this
information (see builtin/interpret-trailers.c).

The idea in this commit to hide implementation details behind an "opaque
pointer" is also known as the "pimpl" (pointer to implementation) idiom
in C++ and is a common pattern in that language (where, for example,
abstract classes only have pointers to concrete classes).

However, the original inspiration to use this idiom does not come from
C++, but instead the book "C Interfaces and Implementations: Techniques
for Creating Reusable Software" [1]. This book recommends opaque
pointers as a good design principle for designing C libraries, using the
term "interface" as the functions defined in *.h (header) files and
"implementation" as the corresponding *.c file which define the
interfaces.

The book says this about opaque pointers:

    ... clients can manipulate such pointers freely, but they can’t
    dereference them; that is, they can’t look at the innards of the
    structure pointed to by them. Only the implementation has that
    privilege. Opaque pointers hide representation details and help
    catch errors.

In our case, "struct trailer_info" is now hidden from clients, and the
ways in which this opaque pointer can be used is limited to the richness
of <trailer.h>. In other words, <trailer.h> exclusively controls exactly
how "trailer_info" pointers are to be used.

[1] Hanson, David R. "C Interfaces and Implementations: Techniques for
    Creating Reusable Software". Addison Wesley, 1997. p. 22

Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linusa@google.com>
---
 trailer.c | 21 +++++++++++++++++++++
 trailer.h | 23 ++---------------------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/trailer.c b/trailer.c
index 9179dd802c6..6167b707ae0 100644
--- a/trailer.c
+++ b/trailer.c
@@ -11,6 +11,27 @@
  * Copyright (c) 2013, 2014 Christian Couder <chriscool@tuxfamily.org>
  */
 
+struct trailer_info {
+	/*
+	 * True if there is a blank line before the location pointed to by
+	 * trailer_block_start.
+	 */
+	int blank_line_before_trailer;
+
+	/*
+	 * Offsets to the trailer block start and end positions in the input
+	 * string. If no trailer block is found, these are both set to the
+	 * "true" end of the input (find_end_of_log_message()).
+	 */
+	size_t trailer_block_start, trailer_block_end;
+
+	/*
+	 * Array of trailers found.
+	 */
+	char **trailers;
+	size_t trailer_nr;
+};
+
 struct conf_info {
 	char *name;
 	char *key;
diff --git a/trailer.h b/trailer.h
index b32213a9e23..a63e97a2663 100644
--- a/trailer.h
+++ b/trailer.h
@@ -4,6 +4,8 @@
 #include "list.h"
 #include "strbuf.h"
 
+struct trailer_info;
+
 enum trailer_where {
 	WHERE_DEFAULT,
 	WHERE_END,
@@ -29,27 +31,6 @@ int trailer_set_where(enum trailer_where *item, const char *value);
 int trailer_set_if_exists(enum trailer_if_exists *item, const char *value);
 int trailer_set_if_missing(enum trailer_if_missing *item, const char *value);
 
-struct trailer_info {
-	/*
-	 * True if there is a blank line before the location pointed to by
-	 * trailer_block_start.
-	 */
-	int blank_line_before_trailer;
-
-	/*
-	 * Offsets to the trailer block start and end positions in the input
-	 * string. If no trailer block is found, these are both set to the
-	 * "true" end of the input (find_end_of_log_message()).
-	 */
-	size_t trailer_block_start, trailer_block_end;
-
-	/*
-	 * Array of trailers found.
-	 */
-	char **trailers;
-	size_t trailer_nr;
-};
-
 /*
  * A list that represents newly-added trailers, such as those provided
  * with the --trailer command line option of git-interpret-trailers.
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH 6/6] trailer: retire trailer_info_get() from API
  2024-03-16  6:27 [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Linus Arver via GitGitGadget
                   ` (4 preceding siblings ...)
  2024-03-16  6:27 ` [PATCH 5/6] trailer: make trailer_info struct private Linus Arver via GitGitGadget
@ 2024-03-16  6:27 ` Linus Arver via GitGitGadget
  2024-03-16 17:06 ` [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Junio C Hamano
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-03-16  6:27 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Make trailer_info_get() "static" to be file-scoped to trailer.c, because
no one outside of trailer.c uses it. Remove its declaration from
<trailer.h>.

We have to also reposition it to be above parse_trailers(), which
depends on it.

Signed-off-by: Linus Arver <linusa@google.com>
---
 trailer.c | 92 +++++++++++++++++++++++++++----------------------------
 trailer.h |  2 --
 2 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/trailer.c b/trailer.c
index 6167b707ae0..33b6aa7e8bd 100644
--- a/trailer.c
+++ b/trailer.c
@@ -979,6 +979,52 @@ static struct trailer_info *trailer_info_new(void)
 	return info;
 }
 
+static struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
+					     const char *str)
+{
+	struct trailer_info *info = trailer_info_new();
+	size_t end_of_log_message = 0, trailer_block_start = 0;
+	struct strbuf **trailer_lines, **ptr;
+	char **trailer_strings = NULL;
+	size_t nr = 0, alloc = 0;
+	char **last = NULL;
+
+	trailer_config_init();
+
+	end_of_log_message = find_end_of_log_message(str, opts->no_divider);
+	trailer_block_start = find_trailer_block_start(str, end_of_log_message);
+
+	trailer_lines = strbuf_split_buf(str + trailer_block_start,
+					 end_of_log_message - trailer_block_start,
+					 '\n',
+					 0);
+	for (ptr = trailer_lines; *ptr; ptr++) {
+		if (last && isspace((*ptr)->buf[0])) {
+			struct strbuf sb = STRBUF_INIT;
+			strbuf_attach(&sb, *last, strlen(*last), strlen(*last));
+			strbuf_addbuf(&sb, *ptr);
+			*last = strbuf_detach(&sb, NULL);
+			continue;
+		}
+		ALLOC_GROW(trailer_strings, nr + 1, alloc);
+		trailer_strings[nr] = strbuf_detach(*ptr, NULL);
+		last = find_separator(trailer_strings[nr], separators) >= 1
+			? &trailer_strings[nr]
+			: NULL;
+		nr++;
+	}
+	strbuf_list_free(trailer_lines);
+
+	info->blank_line_before_trailer = ends_with_blank_line(str,
+							       trailer_block_start);
+	info->trailer_block_start = trailer_block_start;
+	info->trailer_block_end = end_of_log_message;
+	info->trailers = trailer_strings;
+	info->trailer_nr = nr;
+
+	return info;
+}
+
 /*
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
@@ -1044,52 +1090,6 @@ int blank_line_before_trailer_block(struct trailer_info *info)
 	return info->blank_line_before_trailer;
 }
 
-struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
-				      const char *str)
-{
-	struct trailer_info *info = trailer_info_new();
-	size_t end_of_log_message = 0, trailer_block_start = 0;
-	struct strbuf **trailer_lines, **ptr;
-	char **trailer_strings = NULL;
-	size_t nr = 0, alloc = 0;
-	char **last = NULL;
-
-	trailer_config_init();
-
-	end_of_log_message = find_end_of_log_message(str, opts->no_divider);
-	trailer_block_start = find_trailer_block_start(str, end_of_log_message);
-
-	trailer_lines = strbuf_split_buf(str + trailer_block_start,
-					 end_of_log_message - trailer_block_start,
-					 '\n',
-					 0);
-	for (ptr = trailer_lines; *ptr; ptr++) {
-		if (last && isspace((*ptr)->buf[0])) {
-			struct strbuf sb = STRBUF_INIT;
-			strbuf_attach(&sb, *last, strlen(*last), strlen(*last));
-			strbuf_addbuf(&sb, *ptr);
-			*last = strbuf_detach(&sb, NULL);
-			continue;
-		}
-		ALLOC_GROW(trailer_strings, nr + 1, alloc);
-		trailer_strings[nr] = strbuf_detach(*ptr, NULL);
-		last = find_separator(trailer_strings[nr], separators) >= 1
-			? &trailer_strings[nr]
-			: NULL;
-		nr++;
-	}
-	strbuf_list_free(trailer_lines);
-
-	info->blank_line_before_trailer = ends_with_blank_line(str,
-							       trailer_block_start);
-	info->trailer_block_start = trailer_block_start;
-	info->trailer_block_end = end_of_log_message;
-	info->trailers = trailer_strings;
-	info->trailer_nr = nr;
-
-	return info;
-}
-
 void trailer_info_release(struct trailer_info *info)
 {
 	size_t i;
diff --git a/trailer.h b/trailer.h
index a63e97a2663..1b7422fa2b0 100644
--- a/trailer.h
+++ b/trailer.h
@@ -73,8 +73,6 @@ void process_trailers_lists(struct list_head *head,
 struct trailer_info *parse_trailers(const struct process_trailer_options *,
 				    const char *str,
 				    struct list_head *head);
-struct trailer_info *trailer_info_get(const struct process_trailer_options *,
-				      const char *str);
 
 size_t trailer_block_start(struct trailer_info *);
 size_t trailer_block_end(struct trailer_info *);
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* Re: [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup)
  2024-03-16  6:27 [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Linus Arver via GitGitGadget
                   ` (5 preceding siblings ...)
  2024-03-16  6:27 ` [PATCH 6/6] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
@ 2024-03-16 17:06 ` Junio C Hamano
  2024-03-26 22:00 ` Junio C Hamano
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
  8 siblings, 0 replies; 67+ messages in thread
From: Junio C Hamano @ 2024-03-16 17:06 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> NOTE: This series is based on the la/format-trailer-info topic branch (see
> its discussion at [1]).

Folks, a quick review of the base topic is highly appreciated.  Not
having much review to talk about in [1] makes it a bit premature to
build another series on top of it.

Thanks.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup)
  2024-03-16  6:27 [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Linus Arver via GitGitGadget
                   ` (6 preceding siblings ...)
  2024-03-16 17:06 ` [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Junio C Hamano
@ 2024-03-26 22:00 ` Junio C Hamano
  2024-04-19  5:36   ` Linus Arver
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
  8 siblings, 1 reply; 67+ messages in thread
From: Junio C Hamano @ 2024-03-26 22:00 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> NOTE: This series is based on the la/format-trailer-info topic branch (see
> its discussion at [1]).

This unfortunately depends on another series, which has seen no
reviews after 10 days X-<.  It did not help that this was sent
almost immediately after that unreviewed series that it depends on.

Any takers?  There must be some folks who know the trailer code very
well, no?

Thanks.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* [PATCH v2 0/8] Make trailer_info struct private (plus sequencer cleanup)
  2024-03-16  6:27 [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Linus Arver via GitGitGadget
                   ` (7 preceding siblings ...)
  2024-03-26 22:00 ` Junio C Hamano
@ 2024-04-19  5:22 ` Linus Arver via GitGitGadget
  2024-04-19  5:22   ` [PATCH v2 1/8] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
                     ` (9 more replies)
  8 siblings, 10 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-19  5:22 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver

NOTE: This series is based on the la/format-trailer-info topic branch (see
its discussion at [1]).

This series is based on the initial series [2], notably the v4 version of
patches 17-20 as suggested by Christian [3]. This version addresses the
review comments for those patches, namely the splitting up of Patch 19 there
into 3 separate patches [4] (as Patches 05-07 here) .

The central idea is to make the trailer_info struct private (that is, move
its definition from trailer.h to trailer.c) --- aka the "pimpl" idiom. See
the detailed commit message for Patch 07 for the motivation behind the
change.

Patch 04 makes sequencer.c a well-behaved trailer API consumer, by making
use of the trailer iterator. Patch 03 prepares us for Patch 04. Patch 08
slightly reduces the weight of the API by removing (from the API surface) an
unused function.


Notable changes in v2
=====================

 * Add unit tests at the beginning of the series (Patches 01 and 02) and use
   it to verify that the other edge cases remain unchanged when we add the
   "raw" member (Patch 03)

[1]
https://lore.kernel.org/git/pull.1694.git.1710485706.gitgitgadget@gmail.com/
[2]
https://lore.kernel.org/git/pull.1632.v4.git.1707196348.gitgitgadget@gmail.com/
[3]
https://lore.kernel.org/git/CAP8UFD08F0V13X0+CJ1uhMPzPWVMs2okGVMJch0DkQg5M3BWLA@mail.gmail.com/
[4]
https://lore.kernel.org/git/CAP8UFD1twELGKvvesxgCrZrypKZpgSt04ira3mvurG1UbpDfxQ@mail.gmail.com/

Linus Arver (8):
  Makefile: sort UNIT_TEST_PROGRAMS
  trailer: add unit tests for trailer iterator
  trailer: teach iterator about non-trailer lines
  sequencer: use the trailer iterator
  interpret-trailers: access trailer_info with new helpers
  trailer: make parse_trailers() return trailer_info pointer
  trailer: make trailer_info struct private
  trailer: retire trailer_info_get() from API

 Makefile                     |   5 +-
 builtin/interpret-trailers.c |  12 +--
 sequencer.c                  |  27 +++---
 t/unit-tests/t-trailer.c     | 181 +++++++++++++++++++++++++++++++++++
 trailer.c                    | 161 +++++++++++++++++++------------
 trailer.h                    |  46 ++++-----
 6 files changed, 321 insertions(+), 111 deletions(-)
 create mode 100644 t/unit-tests/t-trailer.c


base-commit: 3452d173241c8b87ecdd67f91f594cb14327e394
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1696%2Flistx%2Ftrailer-api-part-3-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1696/listx/trailer-api-part-3-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/1696

Range-diff vs v1:

 -:  ----------- > 1:  b6a1304f8ae Makefile: sort UNIT_TEST_PROGRAMS
 -:  ----------- > 2:  e1fa05143ac trailer: add unit tests for trailer iterator
 1:  32ad0397737 ! 3:  5520a98e296 trailer: teach iterator about non-trailer lines
     @@ Commit message
      
          Signed-off-by: Linus Arver <linusa@google.com>
      
     + ## t/unit-tests/t-trailer.c ##
     +@@ t/unit-tests/t-trailer.c: static void run_t_trailer_iterator(void)
     + 			"not a trailer line\n"
     + 			"not a trailer line\n"
     + 			"Signed-off-by: x\n",
     +-			1
     ++			/*
     ++			 * Even though there is only really 1 real "trailer"
     ++			 * (Signed-off-by), we still have 4 trailer objects
     ++			 * because we still want to iterate through the entire
     ++			 * block.
     ++			 */
     ++			4
     + 		},
     + 		{
     + 			"with non-trailer lines (one too many) in trailer block",
     +
       ## trailer.c ##
      @@ trailer.c: void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
       
 2:  dc873c3b820 = 4:  84897cf5c83 sequencer: use the trailer iterator
 3:  872e67286c8 = 5:  e961d49cd40 interpret-trailers: access trailer_info with new helpers
 4:  c55ae2cbda9 = 6:  093f68f3658 trailer: make parse_trailers() return trailer_info pointer
 5:  cf59dee5064 = 7:  0e9ae049b88 trailer: make trailer_info struct private
 6:  19de7c64171 = 8:  eca77a1a462 trailer: retire trailer_info_get() from API

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 67+ messages in thread

* [PATCH v2 1/8] Makefile: sort UNIT_TEST_PROGRAMS
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
@ 2024-04-19  5:22   ` Linus Arver via GitGitGadget
  2024-04-19  5:22   ` [PATCH v2 2/8] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
                     ` (8 subsequent siblings)
  9 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-19  5:22 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Signed-off-by: Linus Arver <linusa@google.com>
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4e255c81f22..d3a3f16f076 100644
--- a/Makefile
+++ b/Makefile
@@ -1343,10 +1343,10 @@ THIRD_PARTY_SOURCES += sha1collisiondetection/%
 THIRD_PARTY_SOURCES += sha1dc/%
 
 UNIT_TEST_PROGRAMS += t-basic
-UNIT_TEST_PROGRAMS += t-mem-pool
-UNIT_TEST_PROGRAMS += t-strbuf
 UNIT_TEST_PROGRAMS += t-ctype
+UNIT_TEST_PROGRAMS += t-mem-pool
 UNIT_TEST_PROGRAMS += t-prio-queue
+UNIT_TEST_PROGRAMS += t-strbuf
 UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v2 2/8] trailer: add unit tests for trailer iterator
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
  2024-04-19  5:22   ` [PATCH v2 1/8] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
@ 2024-04-19  5:22   ` Linus Arver via GitGitGadget
  2024-04-19  5:33     ` Linus Arver
                       ` (2 more replies)
  2024-04-19  5:22   ` [PATCH v2 3/8] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
                     ` (7 subsequent siblings)
  9 siblings, 3 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-19  5:22 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Test the number of trailers found by the iterator (to be more precise,
the parsing mechanism which the iterator just walks over) when given
some some arbitrary log message.

We test the iterator because it is a public interface function exposed
by the trailer API (we generally don't want to test internal
implementation details which are, unlike the API, subject to drastic
changes).

Signed-off-by: Linus Arver <linusa@google.com>
---
 Makefile                 |   1 +
 t/unit-tests/t-trailer.c | 175 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 176 insertions(+)
 create mode 100644 t/unit-tests/t-trailer.c

diff --git a/Makefile b/Makefile
index d3a3f16f076..5418ddd03be 100644
--- a/Makefile
+++ b/Makefile
@@ -1347,6 +1347,7 @@ UNIT_TEST_PROGRAMS += t-ctype
 UNIT_TEST_PROGRAMS += t-mem-pool
 UNIT_TEST_PROGRAMS += t-prio-queue
 UNIT_TEST_PROGRAMS += t-strbuf
+UNIT_TEST_PROGRAMS += t-trailer
 UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o
diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c
new file mode 100644
index 00000000000..147a51b66b9
--- /dev/null
+++ b/t/unit-tests/t-trailer.c
@@ -0,0 +1,175 @@
+#include "test-lib.h"
+#include "trailer.h"
+
+static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
+{
+	struct trailer_iterator iter;
+	size_t i = 0;
+
+	trailer_iterator_init(&iter, msg);
+	while (trailer_iterator_advance(&iter)) {
+		i++;
+	}
+	trailer_iterator_release(&iter);
+
+	check_uint(i, ==, num_expected_trailers);
+}
+
+static void run_t_trailer_iterator(void)
+{
+	static struct test_cases {
+		const char *name;
+		const char *msg;
+		size_t num_expected_trailers;
+	} tc[] = {
+		{
+			"empty input",
+			"",
+			0
+		},
+		{
+			"no newline at beginning",
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n",
+			0
+		},
+		{
+			"newline at beginning",
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n",
+			3
+		},
+		{
+			"without body text",
+			"subject: foo bar\n"
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n",
+			3
+		},
+		{
+			"with body text, without divider",
+			"my subject\n"
+			"\n"
+			"my body which is long\n"
+			"and contains some special\n"
+			"chars like : = ? !\n"
+			"hello\n"
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n"
+			"Signed-off-by: x\n",
+			4
+		},
+		{
+			"with body text, without divider (second trailer block)",
+			"my subject\n"
+			"\n"
+			"my body which is long\n"
+			"and contains some special\n"
+			"chars like : = ? !\n"
+			"hello\n"
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n"
+			"Signed-off-by: x\n"
+			"\n"
+			/*
+			 * Because this is the last trailer block, it takes
+			 * precedence over the first one encountered above.
+			 */
+			"Helped-by: x\n"
+			"Signed-off-by: x\n",
+			2
+		},
+		{
+			"with body text, with divider",
+			"my subject\n"
+			"\n"
+			"my body which is long\n"
+			"and contains some special\n"
+			"chars like : = ? !\n"
+			"hello\n"
+			"\n"
+			"---\n"
+			"\n"
+			/*
+			 * This trailer still counts because the iterator
+			 * always ignores the divider.
+			 */
+			"Signed-off-by: x\n",
+			1
+		},
+		{
+			"with non-trailer lines in trailer block",
+			"subject: foo bar\n"
+			"\n"
+			/*
+			 * Even though this trailer block has a non-trailer line
+			 * in it, it's still a valid trailer block because it's
+			 * at least 25% trailers and is Git-generated.
+			 */
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"Signed-off-by: x\n",
+			1
+		},
+		{
+			"with non-trailer lines (one too many) in trailer block",
+			"subject: foo bar\n"
+			"\n"
+			/*
+			 * This block has only 20% trailers, so it's below the
+			 * 25% threshold.
+			 */
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"Signed-off-by: x\n",
+			0
+		},
+		{
+			"with non-trailer lines (only 1) in trailer block, but no Git-generated trailers",
+			"subject: foo bar\n"
+			"\n"
+			/*
+			 * This block has only 1 non-trailer out of 10 (IOW, 90%
+			 * trailers) but is not considered a trailer because the
+			 * 25% threshold only applies to cases where there was a
+			 * Git-generated trailer (see git_generated_prefixes[]
+			 * in trailer.c).
+			 */
+			"Reviewed-by: x\n"
+			"Reviewed-by: x\n"
+			"Reviewed-by: x\n"
+			"Helped-by: x\n"
+			"Helped-by: x\n"
+			"Helped-by: x\n"
+			"Acked-by: x\n"
+			"Acked-by: x\n"
+			"Acked-by: x\n"
+			"not a trailer line\n",
+			0
+		},
+	};
+
+	for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
+		TEST(t_trailer_iterator(tc[i].msg,
+					tc[i].num_expected_trailers),
+		     "%s", tc[i].name);
+	}
+}
+
+int cmd_main(int argc, const char **argv)
+{
+	run_t_trailer_iterator();
+	return test_done();
+}
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v2 3/8] trailer: teach iterator about non-trailer lines
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
  2024-04-19  5:22   ` [PATCH v2 1/8] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
  2024-04-19  5:22   ` [PATCH v2 2/8] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
@ 2024-04-19  5:22   ` Linus Arver via GitGitGadget
  2024-04-19  5:22   ` [PATCH v2 4/8] sequencer: use the trailer iterator Linus Arver via GitGitGadget
                     ` (6 subsequent siblings)
  9 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-19  5:22 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Previously the iterator did not iterate over non-trailer lines. This was
somewhat unfortunate, because trailer blocks could have non-trailer
lines in them since 146245063e (trailer: allow non-trailers in trailer
block, 2016-10-21), which was before the iterator was created in
f0939a0eb1 (trailer: add interface for iterating over commit trailers,
2020-09-27).

So if trailer API users wanted to iterate over all lines in a trailer
block (including non-trailer lines), they could not use the iterator and
were forced to use the lower-level trailer_info struct directly (which
provides a raw string array that includes all lines in the trailer
block).

Change the iterator's behavior so that we also iterate over non-trailer
lines, instead of skipping over them. The new "raw" member of the
iterator allows API users to access previously inaccessible non-trailer
lines. Reword the variable "trailer" to just "line" because this
variable can now hold both trailer lines _and_ non-trailer lines.

The new "raw" member is important because anyone currently not using the
iterator is using trailer_info's raw string array directly to access
lines to check what the combined key + value looks like. If we didn't
provide a "raw" member here, iterator users would have to re-construct
the unparsed line by concatenating the key and value back together again
--- which places an undue burden for iterator users.

The next commit demonstrates the use of the iterator in sequencer.c as an
example of where "raw" will be useful, so that it can start using the
iterator.

For the existing use of the iterator in builtin/shortlog.c, we don't
have to change the code there because that code does

    trailer_iterator_init(&iter, body);
    while (trailer_iterator_advance(&iter)) {
        const char *value = iter.val.buf;

        if (!string_list_has_string(&log->trailers, iter.key.buf))
            continue;

        ...

and the

        if (!string_list_has_string(&log->trailers, iter.key.buf))

condition already skips over non-trailer lines (iter.key.buf is empty
for non-trailer lines, making the comparison still work even with this
commit).

Signed-off-by: Linus Arver <linusa@google.com>
---
 t/unit-tests/t-trailer.c |  8 +++++++-
 trailer.c                | 12 +++++-------
 trailer.h                |  8 ++++++++
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c
index 147a51b66b9..50c696b969b 100644
--- a/t/unit-tests/t-trailer.c
+++ b/t/unit-tests/t-trailer.c
@@ -119,7 +119,13 @@ static void run_t_trailer_iterator(void)
 			"not a trailer line\n"
 			"not a trailer line\n"
 			"Signed-off-by: x\n",
-			1
+			/*
+			 * Even though there is only really 1 real "trailer"
+			 * (Signed-off-by), we still have 4 trailer objects
+			 * because we still want to iterate through the entire
+			 * block.
+			 */
+			4
 		},
 		{
 			"with non-trailer lines (one too many) in trailer block",
diff --git a/trailer.c b/trailer.c
index 3e4dab9c065..4700c441442 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1146,17 +1146,15 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	while (iter->internal.cur < iter->internal.info.trailer_nr) {
-		char *trailer = iter->internal.info.trailers[iter->internal.cur++];
-		int separator_pos = find_separator(trailer, separators);
-
-		if (separator_pos < 1)
-			continue; /* not a real trailer */
+	if (iter->internal.cur < iter->internal.info.trailer_nr) {
+		char *line = iter->internal.info.trailers[iter->internal.cur++];
+		int separator_pos = find_separator(line, separators);
 
+		iter->raw = line;
 		strbuf_reset(&iter->key);
 		strbuf_reset(&iter->val);
 		parse_trailer(&iter->key, &iter->val, NULL,
-			      trailer, separator_pos);
+			      line, separator_pos);
 		/* Always unfold values during iteration. */
 		unfold_value(&iter->val);
 		return 1;
diff --git a/trailer.h b/trailer.h
index 9f42aa75994..ebafa3657e4 100644
--- a/trailer.h
+++ b/trailer.h
@@ -125,6 +125,14 @@ void format_trailers_from_commit(const struct process_trailer_options *,
  *   trailer_iterator_release(&iter);
  */
 struct trailer_iterator {
+	/*
+	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
+	 * key/val pair as part of a trailer block. A trailer block can be
+	 * either 100% trailer lines, or mixed in with non-trailer lines (in
+	 * which case at least 25% must be trailer lines).
+	 */
+	const char *raw;
+
 	struct strbuf key;
 	struct strbuf val;
 
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v2 4/8] sequencer: use the trailer iterator
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
                     ` (2 preceding siblings ...)
  2024-04-19  5:22   ` [PATCH v2 3/8] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
@ 2024-04-19  5:22   ` Linus Arver via GitGitGadget
  2024-04-23 21:19     ` Junio C Hamano
  2024-04-19  5:22   ` [PATCH v2 5/8] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
                     ` (5 subsequent siblings)
  9 siblings, 1 reply; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-19  5:22 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Instead of calling "trailer_info_get()", which is a low-level function
in the trailers implementation (trailer.c), call
trailer_iterator_advance(), which was specifically designed for public
consumption in f0939a0eb1 (trailer: add interface for iterating over
commit trailers, 2020-09-27).

Avoiding "trailer_info_get()" means we don't have to worry about options
like "no_divider" (relevant for parsing trailers). We also don't have to
check for things like "info.trailer_start == info.trailer_end" to see
whether there were any trailers (instead we can just check to see
whether the iterator advanced at all).

Note how we have to use "iter.raw" in order to get the same behavior as
before when we iterated over the unparsed string array (char **trailers)
in trailer_info.

Signed-off-by: Linus Arver <linusa@google.com>
---
 sequencer.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/sequencer.c b/sequencer.c
index ea1441e6174..4c1f6c675e7 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -319,35 +319,32 @@ static const char *get_todo_path(const struct replay_opts *opts)
 static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob,
 	size_t ignore_footer)
 {
-	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
-	struct trailer_info info;
-	size_t i;
+	struct trailer_iterator iter;
+	size_t i = 0;
 	int found_sob = 0, found_sob_last = 0;
 	char saved_char;
 
-	opts.no_divider = 1;
-
 	if (ignore_footer) {
 		saved_char = sb->buf[sb->len - ignore_footer];
 		sb->buf[sb->len - ignore_footer] = '\0';
 	}
 
-	trailer_info_get(&opts, sb->buf, &info);
+	trailer_iterator_init(&iter, sb->buf);
 
 	if (ignore_footer)
 		sb->buf[sb->len - ignore_footer] = saved_char;
 
-	if (info.trailer_block_start == info.trailer_block_end)
-		return 0;
+	while (trailer_iterator_advance(&iter)) {
+		i++;
+		if (sob && !strncmp(iter.raw, sob->buf, sob->len))
+			found_sob = i;
+	}
+	trailer_iterator_release(&iter);
 
-	for (i = 0; i < info.trailer_nr; i++)
-		if (sob && !strncmp(info.trailers[i], sob->buf, sob->len)) {
-			found_sob = 1;
-			if (i == info.trailer_nr - 1)
-				found_sob_last = 1;
-		}
+	if (!i)
+		return 0;
 
-	trailer_info_release(&info);
+	found_sob_last = (int)i == found_sob;
 
 	if (found_sob_last)
 		return 3;
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v2 5/8] interpret-trailers: access trailer_info with new helpers
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
                     ` (3 preceding siblings ...)
  2024-04-19  5:22   ` [PATCH v2 4/8] sequencer: use the trailer iterator Linus Arver via GitGitGadget
@ 2024-04-19  5:22   ` Linus Arver via GitGitGadget
  2024-04-19  5:22   ` [PATCH v2 6/8] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
                     ` (4 subsequent siblings)
  9 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-19  5:22 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Instead of directly accessing trailer_info members, access them
indirectly through new helper functions exposed by the trailer API.

This is the first of two preparatory commits which will allow us to
use the so-called "pimpl" (pointer to implementation) idiom for the
trailer API, by making the trailer_info struct private to the trailer
implementation (and thus hidden from the API).

Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linusa@google.com>
---
 builtin/interpret-trailers.c | 12 ++++++------
 trailer.c                    | 21 +++++++++++++++++++++
 trailer.h                    |  4 ++++
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/builtin/interpret-trailers.c b/builtin/interpret-trailers.c
index 11f4ce9e4a2..f3240682e35 100644
--- a/builtin/interpret-trailers.c
+++ b/builtin/interpret-trailers.c
@@ -141,7 +141,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	LIST_HEAD(head);
 	struct strbuf sb = STRBUF_INIT;
 	struct strbuf trailer_block = STRBUF_INIT;
-	struct trailer_info info;
+	struct trailer_info *info = trailer_info_new();
 	FILE *outfile = stdout;
 
 	trailer_config_init();
@@ -151,13 +151,13 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	if (opts->in_place)
 		outfile = create_in_place_tempfile(file);
 
-	parse_trailers(opts, &info, sb.buf, &head);
+	parse_trailers(opts, info, sb.buf, &head);
 
 	/* Print the lines before the trailers */
 	if (!opts->only_trailers)
-		fwrite(sb.buf, 1, info.trailer_block_start, outfile);
+		fwrite(sb.buf, 1, trailer_block_start(info), outfile);
 
-	if (!opts->only_trailers && !info.blank_line_before_trailer)
+	if (!opts->only_trailers && !blank_line_before_trailer_block(info))
 		fprintf(outfile, "\n");
 
 
@@ -178,8 +178,8 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 
 	/* Print the lines after the trailers as is */
 	if (!opts->only_trailers)
-		fwrite(sb.buf + info.trailer_block_end, 1, sb.len - info.trailer_block_end, outfile);
-	trailer_info_release(&info);
+		fwrite(sb.buf + trailer_block_end(info), 1, sb.len - trailer_block_end(info), outfile);
+	trailer_info_release(info);
 
 	if (opts->in_place)
 		if (rename_tempfile(&trailers_tempfile, file))
diff --git a/trailer.c b/trailer.c
index 4700c441442..95b4c9b8f19 100644
--- a/trailer.c
+++ b/trailer.c
@@ -952,6 +952,12 @@ static void unfold_value(struct strbuf *val)
 	strbuf_release(&out);
 }
 
+struct trailer_info *trailer_info_new(void)
+{
+	struct trailer_info *info = xcalloc(1, sizeof(*info));
+	return info;
+}
+
 /*
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
@@ -1000,6 +1006,21 @@ void free_trailers(struct list_head *trailers)
 	}
 }
 
+size_t trailer_block_start(struct trailer_info *info)
+{
+	return info->trailer_block_start;
+}
+
+size_t trailer_block_end(struct trailer_info *info)
+{
+	return info->trailer_block_end;
+}
+
+int blank_line_before_trailer_block(struct trailer_info *info)
+{
+	return info->blank_line_before_trailer;
+}
+
 void trailer_info_get(const struct process_trailer_options *opts,
 		      const char *str,
 		      struct trailer_info *info)
diff --git a/trailer.h b/trailer.h
index ebafa3657e4..9ac4be853c5 100644
--- a/trailer.h
+++ b/trailer.h
@@ -97,6 +97,10 @@ void parse_trailers(const struct process_trailer_options *,
 void trailer_info_get(const struct process_trailer_options *,
 		      const char *str,
 		      struct trailer_info *);
+size_t trailer_block_start(struct trailer_info *);
+size_t trailer_block_end(struct trailer_info *);
+int blank_line_before_trailer_block(struct trailer_info *);
+struct trailer_info *trailer_info_new(void);
 
 void trailer_info_release(struct trailer_info *info);
 
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v2 6/8] trailer: make parse_trailers() return trailer_info pointer
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
                     ` (4 preceding siblings ...)
  2024-04-19  5:22   ` [PATCH v2 5/8] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
@ 2024-04-19  5:22   ` Linus Arver via GitGitGadget
  2024-04-23 23:17     ` Junio C Hamano
  2024-04-19  5:22   ` [PATCH v2 7/8] trailer: make trailer_info struct private Linus Arver via GitGitGadget
                     ` (3 subsequent siblings)
  9 siblings, 1 reply; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-19  5:22 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

This is the second and final preparatory commit for making the
trailer_info struct private to the trailer implementation.

Make trailer_info_get() do the actual work of allocating a new
trailer_info struct, and return a pointer to it. Because
parse_trailers() wraps around trailer_info_get(), it too can return this
pointer to the caller. From the trailer API user's perspective, the call
to trailer_info_new() can be replaced with parse_trailers(); do so in
interpret-trailers.

Because trailer_info_new() is no longer called by interpret-trailers,
remove this function from the trailer API.

With this change, we no longer allocate trailer_info on the stack ---
all uses of it are via a pointer where the actual data is always
allocated at runtime through trailer_info_new(). Make
trailer_info_release() free this dynamically allocated memory.

Finally, due to the way the function signatures of parse_trailers() and
trailer_info_get() have changed, update the callsites in
format_trailers_from_commit() and trailer_iterator_init() accordingly.

Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linusa@google.com>
---
 builtin/interpret-trailers.c |  4 ++--
 trailer.c                    | 41 +++++++++++++++++++-----------------
 trailer.h                    | 17 ++++++---------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/builtin/interpret-trailers.c b/builtin/interpret-trailers.c
index f3240682e35..6bf8cec005a 100644
--- a/builtin/interpret-trailers.c
+++ b/builtin/interpret-trailers.c
@@ -141,7 +141,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	LIST_HEAD(head);
 	struct strbuf sb = STRBUF_INIT;
 	struct strbuf trailer_block = STRBUF_INIT;
-	struct trailer_info *info = trailer_info_new();
+	struct trailer_info *info;
 	FILE *outfile = stdout;
 
 	trailer_config_init();
@@ -151,7 +151,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	if (opts->in_place)
 		outfile = create_in_place_tempfile(file);
 
-	parse_trailers(opts, info, sb.buf, &head);
+	info = parse_trailers(opts, sb.buf, &head);
 
 	/* Print the lines before the trailers */
 	if (!opts->only_trailers)
diff --git a/trailer.c b/trailer.c
index 95b4c9b8f19..9179dd802c6 100644
--- a/trailer.c
+++ b/trailer.c
@@ -952,7 +952,7 @@ static void unfold_value(struct strbuf *val)
 	strbuf_release(&out);
 }
 
-struct trailer_info *trailer_info_new(void)
+static struct trailer_info *trailer_info_new(void)
 {
 	struct trailer_info *info = xcalloc(1, sizeof(*info));
 	return info;
@@ -962,16 +962,16 @@ struct trailer_info *trailer_info_new(void)
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
  */
-void parse_trailers(const struct process_trailer_options *opts,
-		    struct trailer_info *info,
-		    const char *str,
-		    struct list_head *head)
+struct trailer_info *parse_trailers(const struct process_trailer_options *opts,
+				    const char *str,
+				    struct list_head *head)
 {
+	struct trailer_info *info;
 	struct strbuf tok = STRBUF_INIT;
 	struct strbuf val = STRBUF_INIT;
 	size_t i;
 
-	trailer_info_get(opts, str, info);
+	info = trailer_info_get(opts, str);
 
 	for (i = 0; i < info->trailer_nr; i++) {
 		int separator_pos;
@@ -995,6 +995,8 @@ void parse_trailers(const struct process_trailer_options *opts,
 					 strbuf_detach(&val, NULL));
 		}
 	}
+
+	return info;
 }
 
 void free_trailers(struct list_head *trailers)
@@ -1021,10 +1023,10 @@ int blank_line_before_trailer_block(struct trailer_info *info)
 	return info->blank_line_before_trailer;
 }
 
-void trailer_info_get(const struct process_trailer_options *opts,
-		      const char *str,
-		      struct trailer_info *info)
+struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
+				      const char *str)
 {
+	struct trailer_info *info = trailer_info_new();
 	size_t end_of_log_message = 0, trailer_block_start = 0;
 	struct strbuf **trailer_lines, **ptr;
 	char **trailer_strings = NULL;
@@ -1063,6 +1065,8 @@ void trailer_info_get(const struct process_trailer_options *opts,
 	info->trailer_block_end = end_of_log_message;
 	info->trailers = trailer_strings;
 	info->trailer_nr = nr;
+
+	return info;
 }
 
 void trailer_info_release(struct trailer_info *info)
@@ -1071,6 +1075,7 @@ void trailer_info_release(struct trailer_info *info)
 	for (i = 0; i < info->trailer_nr; i++)
 		free(info->trailers[i]);
 	free(info->trailers);
+	free(info);
 }
 
 void format_trailers(const struct process_trailer_options *opts,
@@ -1138,21 +1143,19 @@ void format_trailers_from_commit(const struct process_trailer_options *opts,
 				 struct strbuf *out)
 {
 	LIST_HEAD(trailer_objects);
-	struct trailer_info info;
-
-	parse_trailers(opts, &info, msg, &trailer_objects);
+	struct trailer_info *info = parse_trailers(opts, msg, &trailer_objects);
 
 	/* If we want the whole block untouched, we can take the fast path. */
 	if (!opts->only_trailers && !opts->unfold && !opts->filter &&
 	    !opts->separator && !opts->key_only && !opts->value_only &&
 	    !opts->key_value_separator) {
-		strbuf_add(out, msg + info.trailer_block_start,
-			   info.trailer_block_end - info.trailer_block_start);
+		strbuf_add(out, msg + info->trailer_block_start,
+			   info->trailer_block_end - info->trailer_block_start);
 	} else
 		format_trailers(opts, &trailer_objects, out);
 
 	free_trailers(&trailer_objects);
-	trailer_info_release(&info);
+	trailer_info_release(info);
 }
 
 void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
@@ -1161,14 +1164,14 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 	strbuf_init(&iter->key, 0);
 	strbuf_init(&iter->val, 0);
 	opts.no_divider = 1;
-	trailer_info_get(&opts, msg, &iter->internal.info);
+	iter->internal.info = trailer_info_get(&opts, msg);
 	iter->internal.cur = 0;
 }
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	if (iter->internal.cur < iter->internal.info.trailer_nr) {
-		char *line = iter->internal.info.trailers[iter->internal.cur++];
+	if (iter->internal.cur < iter->internal.info->trailer_nr) {
+		char *line = iter->internal.info->trailers[iter->internal.cur++];
 		int separator_pos = find_separator(line, separators);
 
 		iter->raw = line;
@@ -1185,7 +1188,7 @@ int trailer_iterator_advance(struct trailer_iterator *iter)
 
 void trailer_iterator_release(struct trailer_iterator *iter)
 {
-	trailer_info_release(&iter->internal.info);
+	trailer_info_release(iter->internal.info);
 	strbuf_release(&iter->val);
 	strbuf_release(&iter->key);
 }
diff --git a/trailer.h b/trailer.h
index 9ac4be853c5..b32213a9e23 100644
--- a/trailer.h
+++ b/trailer.h
@@ -89,18 +89,15 @@ void parse_trailers_from_command_line_args(struct list_head *arg_head,
 void process_trailers_lists(struct list_head *head,
 			    struct list_head *arg_head);
 
-void parse_trailers(const struct process_trailer_options *,
-		    struct trailer_info *,
-		    const char *str,
-		    struct list_head *head);
-
-void trailer_info_get(const struct process_trailer_options *,
-		      const char *str,
-		      struct trailer_info *);
+struct trailer_info *parse_trailers(const struct process_trailer_options *,
+				    const char *str,
+				    struct list_head *head);
+struct trailer_info *trailer_info_get(const struct process_trailer_options *,
+				      const char *str);
+
 size_t trailer_block_start(struct trailer_info *);
 size_t trailer_block_end(struct trailer_info *);
 int blank_line_before_trailer_block(struct trailer_info *);
-struct trailer_info *trailer_info_new(void);
 
 void trailer_info_release(struct trailer_info *info);
 
@@ -142,7 +139,7 @@ struct trailer_iterator {
 
 	/* private */
 	struct {
-		struct trailer_info info;
+		struct trailer_info *info;
 		size_t cur;
 	} internal;
 };
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v2 7/8] trailer: make trailer_info struct private
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
                     ` (5 preceding siblings ...)
  2024-04-19  5:22   ` [PATCH v2 6/8] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
@ 2024-04-19  5:22   ` Linus Arver via GitGitGadget
  2024-04-23 23:27     ` Junio C Hamano
  2024-04-19  5:22   ` [PATCH v2 8/8] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
                     ` (2 subsequent siblings)
  9 siblings, 1 reply; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-19  5:22 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

In 13211ae23f (trailer: separate public from internal portion of
trailer_iterator, 2023-09-09) we moved trailer_info behind an anonymous
struct to discourage use by trailer.h API users. However it still left
open the possibility of external use of trailer_info itself. Now that
there are no external users of trailer_info, we can make this struct
private.

Make this struct private by putting its definition inside trailer.c.
This has two benefits:

  (1) it makes the surface area of the public facing
      interface (trailer.h) smaller, and

  (2) external API users are unable to peer inside this struct (because
      it is only ever exposed as an opaque pointer).

There are a couple disadvantages:

  (A) every time the member of the struct is accessed an extra pointer
      dereference must be done, and

  (B) for users of trailer_info outside trailer.c, this struct can no
      longer be allocated on the stack and may only be allocated on the
      heap (because its definition is hidden away in trailer.c) and
      appropriately deallocated by the user.

(The disadvantages have already been observed in the two preparatory
commits that precede this one.) This commit believes that the benefits
outweigh the disadvantages for designing APIs, as explained below.

Making trailer_info private exposes existing deficiencies in the API.
This is because users of this struct had full access to its internals,
so there wasn't much need to actually design it to be "complete" in the
sense that API users only needed to use what was provided by the API.
For example, the location of the trailer block (start/end offsets
relative to the start of the input text) was accessible by looking at
these struct members directly. Now that the struct is private, we have
to expose new API functions to allow clients to access this
information (see builtin/interpret-trailers.c).

The idea in this commit to hide implementation details behind an "opaque
pointer" is also known as the "pimpl" (pointer to implementation) idiom
in C++ and is a common pattern in that language (where, for example,
abstract classes only have pointers to concrete classes).

However, the original inspiration to use this idiom does not come from
C++, but instead the book "C Interfaces and Implementations: Techniques
for Creating Reusable Software" [1]. This book recommends opaque
pointers as a good design principle for designing C libraries, using the
term "interface" as the functions defined in *.h (header) files and
"implementation" as the corresponding *.c file which define the
interfaces.

The book says this about opaque pointers:

    ... clients can manipulate such pointers freely, but they can’t
    dereference them; that is, they can’t look at the innards of the
    structure pointed to by them. Only the implementation has that
    privilege. Opaque pointers hide representation details and help
    catch errors.

In our case, "struct trailer_info" is now hidden from clients, and the
ways in which this opaque pointer can be used is limited to the richness
of <trailer.h>. In other words, <trailer.h> exclusively controls exactly
how "trailer_info" pointers are to be used.

[1] Hanson, David R. "C Interfaces and Implementations: Techniques for
    Creating Reusable Software". Addison Wesley, 1997. p. 22

Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linusa@google.com>
---
 trailer.c | 21 +++++++++++++++++++++
 trailer.h | 23 ++---------------------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/trailer.c b/trailer.c
index 9179dd802c6..6167b707ae0 100644
--- a/trailer.c
+++ b/trailer.c
@@ -11,6 +11,27 @@
  * Copyright (c) 2013, 2014 Christian Couder <chriscool@tuxfamily.org>
  */
 
+struct trailer_info {
+	/*
+	 * True if there is a blank line before the location pointed to by
+	 * trailer_block_start.
+	 */
+	int blank_line_before_trailer;
+
+	/*
+	 * Offsets to the trailer block start and end positions in the input
+	 * string. If no trailer block is found, these are both set to the
+	 * "true" end of the input (find_end_of_log_message()).
+	 */
+	size_t trailer_block_start, trailer_block_end;
+
+	/*
+	 * Array of trailers found.
+	 */
+	char **trailers;
+	size_t trailer_nr;
+};
+
 struct conf_info {
 	char *name;
 	char *key;
diff --git a/trailer.h b/trailer.h
index b32213a9e23..a63e97a2663 100644
--- a/trailer.h
+++ b/trailer.h
@@ -4,6 +4,8 @@
 #include "list.h"
 #include "strbuf.h"
 
+struct trailer_info;
+
 enum trailer_where {
 	WHERE_DEFAULT,
 	WHERE_END,
@@ -29,27 +31,6 @@ int trailer_set_where(enum trailer_where *item, const char *value);
 int trailer_set_if_exists(enum trailer_if_exists *item, const char *value);
 int trailer_set_if_missing(enum trailer_if_missing *item, const char *value);
 
-struct trailer_info {
-	/*
-	 * True if there is a blank line before the location pointed to by
-	 * trailer_block_start.
-	 */
-	int blank_line_before_trailer;
-
-	/*
-	 * Offsets to the trailer block start and end positions in the input
-	 * string. If no trailer block is found, these are both set to the
-	 * "true" end of the input (find_end_of_log_message()).
-	 */
-	size_t trailer_block_start, trailer_block_end;
-
-	/*
-	 * Array of trailers found.
-	 */
-	char **trailers;
-	size_t trailer_nr;
-};
-
 /*
  * A list that represents newly-added trailers, such as those provided
  * with the --trailer command line option of git-interpret-trailers.
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v2 8/8] trailer: retire trailer_info_get() from API
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
                     ` (6 preceding siblings ...)
  2024-04-19  5:22   ` [PATCH v2 7/8] trailer: make trailer_info struct private Linus Arver via GitGitGadget
@ 2024-04-19  5:22   ` Linus Arver via GitGitGadget
  2024-04-23 23:27     ` Junio C Hamano
  2024-04-24  0:27   ` [PATCH v2 0/8] Make trailer_info struct private (plus sequencer cleanup) Junio C Hamano
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
  9 siblings, 1 reply; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-19  5:22 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Make trailer_info_get() "static" to be file-scoped to trailer.c, because
no one outside of trailer.c uses it. Remove its declaration from
<trailer.h>.

We have to also reposition it to be above parse_trailers(), which
depends on it.

Signed-off-by: Linus Arver <linusa@google.com>
---
 trailer.c | 92 +++++++++++++++++++++++++++----------------------------
 trailer.h |  2 --
 2 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/trailer.c b/trailer.c
index 6167b707ae0..33b6aa7e8bd 100644
--- a/trailer.c
+++ b/trailer.c
@@ -979,6 +979,52 @@ static struct trailer_info *trailer_info_new(void)
 	return info;
 }
 
+static struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
+					     const char *str)
+{
+	struct trailer_info *info = trailer_info_new();
+	size_t end_of_log_message = 0, trailer_block_start = 0;
+	struct strbuf **trailer_lines, **ptr;
+	char **trailer_strings = NULL;
+	size_t nr = 0, alloc = 0;
+	char **last = NULL;
+
+	trailer_config_init();
+
+	end_of_log_message = find_end_of_log_message(str, opts->no_divider);
+	trailer_block_start = find_trailer_block_start(str, end_of_log_message);
+
+	trailer_lines = strbuf_split_buf(str + trailer_block_start,
+					 end_of_log_message - trailer_block_start,
+					 '\n',
+					 0);
+	for (ptr = trailer_lines; *ptr; ptr++) {
+		if (last && isspace((*ptr)->buf[0])) {
+			struct strbuf sb = STRBUF_INIT;
+			strbuf_attach(&sb, *last, strlen(*last), strlen(*last));
+			strbuf_addbuf(&sb, *ptr);
+			*last = strbuf_detach(&sb, NULL);
+			continue;
+		}
+		ALLOC_GROW(trailer_strings, nr + 1, alloc);
+		trailer_strings[nr] = strbuf_detach(*ptr, NULL);
+		last = find_separator(trailer_strings[nr], separators) >= 1
+			? &trailer_strings[nr]
+			: NULL;
+		nr++;
+	}
+	strbuf_list_free(trailer_lines);
+
+	info->blank_line_before_trailer = ends_with_blank_line(str,
+							       trailer_block_start);
+	info->trailer_block_start = trailer_block_start;
+	info->trailer_block_end = end_of_log_message;
+	info->trailers = trailer_strings;
+	info->trailer_nr = nr;
+
+	return info;
+}
+
 /*
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
@@ -1044,52 +1090,6 @@ int blank_line_before_trailer_block(struct trailer_info *info)
 	return info->blank_line_before_trailer;
 }
 
-struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
-				      const char *str)
-{
-	struct trailer_info *info = trailer_info_new();
-	size_t end_of_log_message = 0, trailer_block_start = 0;
-	struct strbuf **trailer_lines, **ptr;
-	char **trailer_strings = NULL;
-	size_t nr = 0, alloc = 0;
-	char **last = NULL;
-
-	trailer_config_init();
-
-	end_of_log_message = find_end_of_log_message(str, opts->no_divider);
-	trailer_block_start = find_trailer_block_start(str, end_of_log_message);
-
-	trailer_lines = strbuf_split_buf(str + trailer_block_start,
-					 end_of_log_message - trailer_block_start,
-					 '\n',
-					 0);
-	for (ptr = trailer_lines; *ptr; ptr++) {
-		if (last && isspace((*ptr)->buf[0])) {
-			struct strbuf sb = STRBUF_INIT;
-			strbuf_attach(&sb, *last, strlen(*last), strlen(*last));
-			strbuf_addbuf(&sb, *ptr);
-			*last = strbuf_detach(&sb, NULL);
-			continue;
-		}
-		ALLOC_GROW(trailer_strings, nr + 1, alloc);
-		trailer_strings[nr] = strbuf_detach(*ptr, NULL);
-		last = find_separator(trailer_strings[nr], separators) >= 1
-			? &trailer_strings[nr]
-			: NULL;
-		nr++;
-	}
-	strbuf_list_free(trailer_lines);
-
-	info->blank_line_before_trailer = ends_with_blank_line(str,
-							       trailer_block_start);
-	info->trailer_block_start = trailer_block_start;
-	info->trailer_block_end = end_of_log_message;
-	info->trailers = trailer_strings;
-	info->trailer_nr = nr;
-
-	return info;
-}
-
 void trailer_info_release(struct trailer_info *info)
 {
 	size_t i;
diff --git a/trailer.h b/trailer.h
index a63e97a2663..1b7422fa2b0 100644
--- a/trailer.h
+++ b/trailer.h
@@ -73,8 +73,6 @@ void process_trailers_lists(struct list_head *head,
 struct trailer_info *parse_trailers(const struct process_trailer_options *,
 				    const char *str,
 				    struct list_head *head);
-struct trailer_info *trailer_info_get(const struct process_trailer_options *,
-				      const char *str);
 
 size_t trailer_block_start(struct trailer_info *);
 size_t trailer_block_end(struct trailer_info *);
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* Re: [PATCH v2 2/8] trailer: add unit tests for trailer iterator
  2024-04-19  5:22   ` [PATCH v2 2/8] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
@ 2024-04-19  5:33     ` Linus Arver
  2024-04-19 18:46     ` Linus Arver
  2024-04-19 21:52     ` Junio C Hamano
  2 siblings, 0 replies; 67+ messages in thread
From: Linus Arver @ 2024-04-19  5:33 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget, git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Linus Arver <linusa@google.com>
> [...]
> +		{
> +			"with non-trailer lines in trailer block",
> +			"subject: foo bar\n"
> +			"\n"
> +			/*
> +			 * Even though this trailer block has a non-trailer line
> +			 * in it, it's still a valid trailer block because it's
> +			 * at least 25% trailers and is Git-generated.
> +			 */
 
In the next reroll (sometime next week?), I should put the

    ... (see git_generated_prefixes[] in trailer.c).

comment up here (where we first mention "Git-generated" trailers)
instead of down in the last test case below.

> +			"not a trailer line\n"
> +			"not a trailer line\n"
> +			"not a trailer line\n"
> +			"Signed-off-by: x\n",
> +			1
> +		},
> +		{
> +			"with non-trailer lines (one too many) in trailer block",
> +			"subject: foo bar\n"
> +			"\n"
> +			/*
> +			 * This block has only 20% trailers, so it's below the
> +			 * 25% threshold.
> +			 */
> +			"not a trailer line\n"
> +			"not a trailer line\n"
> +			"not a trailer line\n"
> +			"not a trailer line\n"
> +			"Signed-off-by: x\n",
> +			0
> +		},
> +		{
> +			"with non-trailer lines (only 1) in trailer block, but no Git-generated trailers",
> +			"subject: foo bar\n"
> +			"\n"
> +			/*
> +			 * This block has only 1 non-trailer out of 10 (IOW, 90%
> +			 * trailers) but is not considered a trailer because the
> +			 * 25% threshold only applies to cases where there was a
> +			 * Git-generated trailer (see git_generated_prefixes[]
> +			 * in trailer.c).
> +			 */
> +			"Reviewed-by: x\n"
> +			"Reviewed-by: x\n"
> +			"Reviewed-by: x\n"
> +			"Helped-by: x\n"
> +			"Helped-by: x\n"
> +			"Helped-by: x\n"
> +			"Acked-by: x\n"
> +			"Acked-by: x\n"
> +			"Acked-by: x\n"
> +			"not a trailer line\n",
> +			0
> +		},
> +	};

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup)
  2024-03-26 22:00 ` Junio C Hamano
@ 2024-04-19  5:36   ` Linus Arver
  0 siblings, 0 replies; 67+ messages in thread
From: Linus Arver @ 2024-04-19  5:36 UTC (permalink / raw)
  To: Junio C Hamano, Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk

Junio C Hamano <gitster@pobox.com> writes:

> "Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> NOTE: This series is based on the la/format-trailer-info topic branch (see
>> its discussion at [1]).
>
> This unfortunately depends on another series, which has seen no
> reviews after 10 days X-<.  It did not help that this was sent
> almost immediately after that unreviewed series that it depends on.
>
> Any takers?  There must be some folks who know the trailer code very
> well, no?
>

I've added some unit test cases in v2 Patch 02 to make this series a bit
more appealing for reviewers. Cheers.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v2 2/8] trailer: add unit tests for trailer iterator
  2024-04-19  5:22   ` [PATCH v2 2/8] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
  2024-04-19  5:33     ` Linus Arver
@ 2024-04-19 18:46     ` Linus Arver
  2024-04-19 21:52     ` Junio C Hamano
  2 siblings, 0 replies; 67+ messages in thread
From: Linus Arver @ 2024-04-19 18:46 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget, git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Linus Arver <linusa@google.com>
> 
> [...]
> 
> +		{
> +			"with non-trailer lines (only 1) in trailer block, but no Git-generated trailers",
> +			"subject: foo bar\n"
> +			"\n"
> +			/*
> +			 * This block has only 1 non-trailer out of 10 (IOW, 90%
> +			 * trailers) but is not considered a trailer because the

s/a trailer/a trailer block

> +			 * 25% threshold only applies to cases where there was a
> +			 * Git-generated trailer (see git_generated_prefixes[]
> +			 * in trailer.c).
> +			 */
> +			"Reviewed-by: x\n"
> +			"Reviewed-by: x\n"
> +			"Reviewed-by: x\n"
> +			"Helped-by: x\n"
> +			"Helped-by: x\n"
> +			"Helped-by: x\n"
> +			"Acked-by: x\n"
> +			"Acked-by: x\n"
> +			"Acked-by: x\n"
> +			"not a trailer line\n",
> +			0
> +		},

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v2 2/8] trailer: add unit tests for trailer iterator
  2024-04-19  5:22   ` [PATCH v2 2/8] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
  2024-04-19  5:33     ` Linus Arver
  2024-04-19 18:46     ` Linus Arver
@ 2024-04-19 21:52     ` Junio C Hamano
  2024-04-20  0:14       ` Linus Arver
  2 siblings, 1 reply; 67+ messages in thread
From: Junio C Hamano @ 2024-04-19 21:52 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> +UNIT_TEST_PROGRAMS += t-trailer
>  UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS))
>  UNIT_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS))
>  UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o

Totally offtopic, but does it bother folks who are interested in
adding more unit tests that they do not seem to interact very well
with GIT_SKIP_TESTS environment variable?

> diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c
> new file mode 100644
> index 00000000000..147a51b66b9
> --- /dev/null
> +++ b/t/unit-tests/t-trailer.c
> @@ -0,0 +1,175 @@
> +#include "test-lib.h"
> +#include "trailer.h"
> +
> +static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
> +{
> +	struct trailer_iterator iter;
> +	size_t i = 0;
> +
> +	trailer_iterator_init(&iter, msg);
> +	while (trailer_iterator_advance(&iter)) {
> +		i++;
> +	}

Unnecessary {braces} around a single-statement block?

> +	trailer_iterator_release(&iter);
> +
> +	check_uint(i, ==, num_expected_trailers);
> +}
> +
> +static void run_t_trailer_iterator(void)
> +{
> +	static struct test_cases {
> +		const char *name;
> +		const char *msg;
> +		size_t num_expected_trailers;

This is more like number of lines in the trailer block, not
limiting its count only to true trailers, no?

> +	} tc[] = {
> ...
> +		{
> +			"with non-trailer lines in trailer block",
> +			"subject: foo bar\n"
> +			"\n"
> +			/*
> +			 * Even though this trailer block has a non-trailer line
> +			 * in it, it's still a valid trailer block because it's
> +			 * at least 25% trailers and is Git-generated.
> +			 */
> +			"not a trailer line\n"
> +			"not a trailer line\n"
> +			"not a trailer line\n"
> +			"Signed-off-by: x\n",
> +			1
> +		},

It is OK to leave it num_expected_trailers in this step and then
rename it when you update this "1" (number of real trailer lines)
to "4" (number of lines in the trailer block).

I wonder if you'd want to make more data available to the test.  At
least it would be more useful if the number of true trailer lines
and the number of lines in the trialer block are available
separately.

The interface into the trailers that is being tested by this code is
"the caller repeatedly calls the iterator, and the caller can
inspect the iterator's state available as its .raw, .key and .val
members and use them as it sees fit", so you could check, if you
wanted to, the following given the above sample data:

 * the first iteration finds no key/value pair (optionally, the
   contents found in the .raw member is as expected).
 * the second iteration finds no key/value pair (ditto).
 * the third iteration finds no key/value pair (ditto).
 * the fourth iteration finds key="Signed-off-by" value="x".
 * there is no fifth iteration.

but the current code only checks the last condition and nothing
else.  I dunno.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v2 2/8] trailer: add unit tests for trailer iterator
  2024-04-19 21:52     ` Junio C Hamano
@ 2024-04-20  0:14       ` Linus Arver
  0 siblings, 0 replies; 67+ messages in thread
From: Linus Arver @ 2024-04-20  0:14 UTC (permalink / raw)
  To: Junio C Hamano, Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk

Junio C Hamano <gitster@pobox.com> writes:

> "Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> +UNIT_TEST_PROGRAMS += t-trailer
>>  UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS))
>>  UNIT_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS))
>>  UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o
>
> Totally offtopic, but does it bother folks who are interested in
> adding more unit tests that they do not seem to interact very well
> with GIT_SKIP_TESTS environment variable?

FWIW I am not bothered (not that I've actually used GIT_SKIP_TESTS)
mainly because the unit tests finish so quickly.

>
>> diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c
>> new file mode 100644
>> index 00000000000..147a51b66b9
>> --- /dev/null
>> +++ b/t/unit-tests/t-trailer.c
>> @@ -0,0 +1,175 @@
>> +#include "test-lib.h"
>> +#include "trailer.h"
>> +
>> +static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
>> +{
>> +	struct trailer_iterator iter;
>> +	size_t i = 0;
>> +
>> +	trailer_iterator_init(&iter, msg);
>> +	while (trailer_iterator_advance(&iter)) {
>> +		i++;
>> +	}
>
> Unnecessary {braces} around a single-statement block?

Gah, I blame writing too much Go. Will fix.

I also wonder if there's a C linter that could catch this... I am not
very familiar with C tooling. Would be great to run that in CI (GGG).

>> +	trailer_iterator_release(&iter);
>> +
>> +	check_uint(i, ==, num_expected_trailers);
>> +}
>> +
>> +static void run_t_trailer_iterator(void)
>> +{
>> +	static struct test_cases {
>> +		const char *name;
>> +		const char *msg;
>> +		size_t num_expected_trailers;
>
> This is more like number of lines in the trailer block, not
> limiting its count only to true trailers, no?

Yes, but to be even more precise, it would be the number of trailer
objects in the trailer block (a single trailer could be folded over
multiple lines). Will update to "num_expected_objects".

>
>> +	} tc[] = {
>> ...
>> +		{
>> +			"with non-trailer lines in trailer block",
>> +			"subject: foo bar\n"
>> +			"\n"
>> +			/*
>> +			 * Even though this trailer block has a non-trailer line
>> +			 * in it, it's still a valid trailer block because it's
>> +			 * at least 25% trailers and is Git-generated.
>> +			 */
>> +			"not a trailer line\n"
>> +			"not a trailer line\n"
>> +			"not a trailer line\n"
>> +			"Signed-off-by: x\n",
>> +			1
>> +		},
>
> It is OK to leave it num_expected_trailers in this step and then
> rename it when you update this "1" (number of real trailer lines)
> to "4" (number of lines in the trailer block).
>
> I wonder if you'd want to make more data available to the test.  At
> least it would be more useful if the number of true trailer lines
> and the number of lines in the trialer block are available
> separately.

I purposely did the simplest test possible in order to keep the patch
simple. Totally OK with expanding the data available to the test though,
if you'd prefer that (although that could also be in a separate series
later when we start converting some of the existing shell tests to these
unit tests).

> The interface into the trailers that is being tested by this code is
> "the caller repeatedly calls the iterator, and the caller can
> inspect the iterator's state available as its .raw, .key and .val
> members and use them as it sees fit", so you could check, if you
> wanted to, the following given the above sample data:
>
>  * the first iteration finds no key/value pair (optionally, the
>    contents found in the .raw member is as expected).
>  * the second iteration finds no key/value pair (ditto).
>  * the third iteration finds no key/value pair (ditto).
>  * the fourth iteration finds key="Signed-off-by" value="x".
>  * there is no fifth iteration.
>
> but the current code only checks the last condition and nothing
> else.  I dunno.

Yeah, this sounds like the natural thing to do. Basically have an exact
list of "this is the linked list of trailer objects I expect to see
after parsing is complete".

I do plan on making the trailer iterator struct private in a future
series though, so maybe it's best to do the above after that series (to
avoid churn)? IDK.

@Christian thoughts?

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v2 4/8] sequencer: use the trailer iterator
  2024-04-19  5:22   ` [PATCH v2 4/8] sequencer: use the trailer iterator Linus Arver via GitGitGadget
@ 2024-04-23 21:19     ` Junio C Hamano
  0 siblings, 0 replies; 67+ messages in thread
From: Junio C Hamano @ 2024-04-23 21:19 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Linus Arver <linusa@google.com>
>
> Instead of calling "trailer_info_get()", which is a low-level function
> in the trailers implementation (trailer.c), call
> trailer_iterator_advance(), which was specifically designed for public
> consumption in f0939a0eb1 (trailer: add interface for iterating over
> commit trailers, 2020-09-27).
>
> Avoiding "trailer_info_get()" means we don't have to worry about options
> like "no_divider" (relevant for parsing trailers). We also don't have to
> check for things like "info.trailer_start == info.trailer_end" to see
> whether there were any trailers (instead we can just check to see
> whether the iterator advanced at all).
>
> Note how we have to use "iter.raw" in order to get the same behavior as
> before when we iterated over the unparsed string array (char **trailers)
> in trailer_info.

> Signed-off-by: Linus Arver <linusa@google.com>
> ---
>  sequencer.c | 27 ++++++++++++---------------
>  1 file changed, 12 insertions(+), 15 deletions(-)

OK.  The code reduction primarily comes from the fact that the
original was manually iterating over the trailer lines that you can
get from the iterator API.

> +	while (trailer_iterator_advance(&iter)) {
> +		i++;
> +		if (sob && !strncmp(iter.raw, sob->buf, sob->len))
> +			found_sob = i;
> +	}
> +	trailer_iterator_release(&iter);
>  
> +	if (!i)
> +		return 0;
>  
> +	found_sob_last = (int)i == found_sob;

This is slightly harder to reason about, as we cannot directly say
"the collection being iterated over has .nr members, and what we
found was at the end" like the original could do in its loop.

> -	for (i = 0; i < info.trailer_nr; i++)
> -		if (sob && !strncmp(info.trailers[i], sob->buf, sob->len)) {
> -			found_sob = 1;
> -			if (i == info.trailer_nr - 1)
> -				found_sob_last = 1;
> -		}

As 'i' is incremented before we set found_sob to it in the new loop,
when it is assigned in the loop, the value of found_sob will never
be zero.  It used to be that found_sob takes only 0 or 1, but
because we only care about found_sob and found_sob_last being
zero/non-zero in the remainder of the code, this does not affect the
correctness of the code.

Looking good.



^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v2 6/8] trailer: make parse_trailers() return trailer_info pointer
  2024-04-19  5:22   ` [PATCH v2 6/8] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
@ 2024-04-23 23:17     ` Junio C Hamano
  0 siblings, 0 replies; 67+ messages in thread
From: Junio C Hamano @ 2024-04-23 23:17 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> ... From the trailer API user's perspective, the call
> to trailer_info_new() can be replaced with parse_trailers(); do so in
> interpret-trailers.

And from the trailer API users' perspective, it would now help to
have a bit of comment on parse_trailers() function.  The users need
to know at least:

 - what the function returns (i.e. a pointer to an opaque
   trailer_info structure),

 - what operations can be done to the opaque structure, and 

 - what informations can be extracted out of the opaque structure.

When appropriately typed and named members in the structure are
visible, the latter two are obvious in well written programs, but
now you are going to hide the data structure, "Show me your data
structures, and I won't usually need your code; it'll be obvious."
would no longer work.  You'd need to compensate for making the
structure opaque to rob the clarity from the readers with good
comments to the function and the API to help them.

The changes in this patch look more-or-less obvious.  Looking good.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v2 7/8] trailer: make trailer_info struct private
  2024-04-19  5:22   ` [PATCH v2 7/8] trailer: make trailer_info struct private Linus Arver via GitGitGadget
@ 2024-04-23 23:27     ` Junio C Hamano
  2024-04-25  3:17       ` Linus Arver
  0 siblings, 1 reply; 67+ messages in thread
From: Junio C Hamano @ 2024-04-23 23:27 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> There are a couple disadvantages:
>
>   (A) every time the member of the struct is accessed an extra pointer
>       dereference must be done, and
>
>   (B) for users of trailer_info outside trailer.c, this struct can no
>       longer be allocated on the stack and may only be allocated on the
>       heap (because its definition is hidden away in trailer.c) and
>       appropriately deallocated by the user.

    (C) without good documentation on the API, the opaque struct is
        hostile to programmers by going opposite to "Show me your
        data structures, and I won't usually need your code; it'll
        be obvious." mantra.

The comment inside trailer.c does not count (the API users are not
supposed to peek in it---that's the whole point of making the
structure opaque).  You'd need to compensate with a bit more doc in
trailer.h to help the API users.

Other than that, looks "correct".

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v2 8/8] trailer: retire trailer_info_get() from API
  2024-04-19  5:22   ` [PATCH v2 8/8] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
@ 2024-04-23 23:27     ` Junio C Hamano
  0 siblings, 0 replies; 67+ messages in thread
From: Junio C Hamano @ 2024-04-23 23:27 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Linus Arver <linusa@google.com>
>
> Make trailer_info_get() "static" to be file-scoped to trailer.c, because
> no one outside of trailer.c uses it. Remove its declaration from
> <trailer.h>.

Yay.  But the callers still need to know what parse_trailers() gives
them, and what they can do with the struct.

Other than that, an obvious last step that concludes the series.
Nicely structured.


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v2 0/8] Make trailer_info struct private (plus sequencer cleanup)
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
                     ` (7 preceding siblings ...)
  2024-04-19  5:22   ` [PATCH v2 8/8] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
@ 2024-04-24  0:27   ` Junio C Hamano
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
  9 siblings, 0 replies; 67+ messages in thread
From: Junio C Hamano @ 2024-04-24  0:27 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> NOTE: This series is based on the la/format-trailer-info topic branch (see
> its discussion at [1]).
>
> This series is based on the initial series [2], notably the v4 version of
> patches 17-20 as suggested by Christian [3]. This version addresses the
> review comments for those patches, namely the splitting up of Patch 19 there
> into 3 separate patches [4] (as Patches 05-07 here) .
>
> The central idea is to make the trailer_info struct private (that is, move
> its definition from trailer.h to trailer.c) --- aka the "pimpl" idiom. See
> the detailed commit message for Patch 07 for the motivation behind the
> change.
>
> Patch 04 makes sequencer.c a well-behaved trailer API consumer, by making
> use of the trailer iterator. Patch 03 prepares us for Patch 04. Patch 08
> slightly reduces the weight of the API by removing (from the API surface) an
> unused function.

As we haven't seen any interest or reviews to this series over its
two iterations, I took a look myself and it looked mostly OK to me.

So, I'll mark the topic for 'next' unless somebody objects (I really
was hoping that Christian would utter something on the topic as it
has been his area all along), but given that we'd be in pre-release
freeze for one more week, there is no need to rush.

Thanks.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v2 7/8] trailer: make trailer_info struct private
  2024-04-23 23:27     ` Junio C Hamano
@ 2024-04-25  3:17       ` Linus Arver
  0 siblings, 0 replies; 67+ messages in thread
From: Linus Arver @ 2024-04-25  3:17 UTC (permalink / raw)
  To: Junio C Hamano, Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk

Junio C Hamano <gitster@pobox.com> writes:

> "Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> There are a couple disadvantages:
>>
>>   (A) every time the member of the struct is accessed an extra pointer
>>       dereference must be done, and
>>
>>   (B) for users of trailer_info outside trailer.c, this struct can no
>>       longer be allocated on the stack and may only be allocated on the
>>       heap (because its definition is hidden away in trailer.c) and
>>       appropriately deallocated by the user.
>
>     (C) without good documentation on the API, the opaque struct is
>         hostile to programmers by going opposite to "Show me your
>         data structures, and I won't usually need your code; it'll
>         be obvious." mantra.
>
> The comment inside trailer.c does not count (the API users are not
> supposed to peek in it---that's the whole point of making the
> structure opaque).  You'd need to compensate with a bit more doc in
> trailer.h to help the API users.

SGTM. I can reroll again by the end of the week to add docs for would-be
API users. Cheers

^ permalink raw reply	[flat|nested] 67+ messages in thread

* [PATCH v3 00/10] Make trailer_info struct private (plus sequencer cleanup)
  2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
                     ` (8 preceding siblings ...)
  2024-04-24  0:27   ` [PATCH v2 0/8] Make trailer_info struct private (plus sequencer cleanup) Junio C Hamano
@ 2024-04-26  0:26   ` Linus Arver via GitGitGadget
  2024-04-26  0:26     ` [PATCH v3 01/10] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
                       ` (11 more replies)
  9 siblings, 12 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-26  0:26 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver

NOTE: This series is based on the la/format-trailer-info topic branch (see
its discussion at [1]).

This series is based on the initial series [2], notably the v4 version of
patches 17-20 as suggested by Christian [3]. This version addresses the
review comments for those patches, namely the splitting up of Patch 19 there
into 3 separate patches [4] (as Patches 05-07 here) .

The central idea is to make the trailer_info struct private (that is, move
its definition from trailer.h to trailer.c) --- aka the "pimpl" idiom. See
the detailed commit message for Patch 07 for the motivation behind the
change.

Patch 04 makes sequencer.c a well-behaved trailer API consumer, by making
use of the trailer iterator. Patch 03 prepares us for Patch 04. Patch 08
slightly reduces the weight of the API by removing (from the API surface) an
unused function.


Notable changes in v3
=====================

 * (NEW Patch 10) Expand test coverage to check the contents of each
   iteration (raw, key, val fields), not just the total number of iterations
 * (NEW Patch 09) Add documentation in <trailer.h> for using
   parse_trailers()
 * (unrelated) I will lose access to my linusa@google.com email address
   tomorrow (I'm switching jobs!) and so future emails from me will come
   from linus@ucla.edu [5]. I've added the latter email to the CC list here
   so things should just work. Cheers


Notable changes in v2
=====================

 * Add unit tests at the beginning of the series (Patches 01 and 02) and use
   it to verify that the other edge cases remain unchanged when we add the
   "raw" member (Patch 03)

[1]
https://lore.kernel.org/git/pull.1694.git.1710485706.gitgitgadget@gmail.com/
[2]
https://lore.kernel.org/git/pull.1632.v4.git.1707196348.gitgitgadget@gmail.com/
[3]
https://lore.kernel.org/git/CAP8UFD08F0V13X0+CJ1uhMPzPWVMs2okGVMJch0DkQg5M3BWLA@mail.gmail.com/
[4]
https://lore.kernel.org/git/CAP8UFD1twELGKvvesxgCrZrypKZpgSt04ira3mvurG1UbpDfxQ@mail.gmail.com/
[5]
https://lore.kernel.org/git/pull.1720.git.1713309711217.gitgitgadget@gmail.com/

Linus Arver (10):
  Makefile: sort UNIT_TEST_PROGRAMS
  trailer: add unit tests for trailer iterator
  trailer: teach iterator about non-trailer lines
  sequencer: use the trailer iterator
  interpret-trailers: access trailer_info with new helpers
  trailer: make parse_trailers() return trailer_info pointer
  trailer: make trailer_info struct private
  trailer: retire trailer_info_get() from API
  trailer: document parse_trailers() usage
  trailer unit tests: inspect iterator contents

 Makefile                     |   5 +-
 builtin/interpret-trailers.c |  12 +-
 sequencer.c                  |  27 ++-
 t/unit-tests/t-trailer.c     | 315 +++++++++++++++++++++++++++++++++++
 trailer.c                    | 167 ++++++++++++-------
 trailer.h                    |  95 +++++++----
 6 files changed, 507 insertions(+), 114 deletions(-)
 create mode 100644 t/unit-tests/t-trailer.c


base-commit: 3452d173241c8b87ecdd67f91f594cb14327e394
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1696%2Flistx%2Ftrailer-api-part-3-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1696/listx/trailer-api-part-3-v3
Pull-Request: https://github.com/gitgitgadget/git/pull/1696

Range-diff vs v2:

  1:  b6a1304f8ae =  1:  b6a1304f8ae Makefile: sort UNIT_TEST_PROGRAMS
  2:  e1fa05143ac !  2:  4ad0fbbb33c trailer: add unit tests for trailer iterator
     @@ t/unit-tests/t-trailer.c (new)
      +	size_t i = 0;
      +
      +	trailer_iterator_init(&iter, msg);
     -+	while (trailer_iterator_advance(&iter)) {
     ++	while (trailer_iterator_advance(&iter))
      +		i++;
     -+	}
      +	trailer_iterator_release(&iter);
      +
      +	check_uint(i, ==, num_expected_trailers);
     @@ t/unit-tests/t-trailer.c (new)
      +			/*
      +			 * Even though this trailer block has a non-trailer line
      +			 * in it, it's still a valid trailer block because it's
     -+			 * at least 25% trailers and is Git-generated.
     ++			 * at least 25% trailers and is Git-generated (see
     ++			 * git_generated_prefixes[] in trailer.c).
      +			 */
      +			"not a trailer line\n"
      +			"not a trailer line\n"
     @@ t/unit-tests/t-trailer.c (new)
      +			"\n"
      +			/*
      +			 * This block has only 1 non-trailer out of 10 (IOW, 90%
     -+			 * trailers) but is not considered a trailer because the
     -+			 * 25% threshold only applies to cases where there was a
     -+			 * Git-generated trailer (see git_generated_prefixes[]
     -+			 * in trailer.c).
     ++			 * trailers) but is not considered a trailer block
     ++			 * because the 25% threshold only applies to cases where
     ++			 * there was a Git-generated trailer.
      +			 */
      +			"Reviewed-by: x\n"
      +			"Reviewed-by: x\n"
  3:  5520a98e296 !  3:  9077d5a315d trailer: teach iterator about non-trailer lines
     @@ Commit message
          for non-trailer lines, making the comparison still work even with this
          commit).
      
     +    Rename "num_expected_trailers" to "num_expected_objects" in
     +    t/unit-tests/t-trailer.c because the items we iterate over now include
     +    non-trailer lines.
     +
          Signed-off-by: Linus Arver <linusa@google.com>
      
       ## t/unit-tests/t-trailer.c ##
     +@@
     + #include "test-lib.h"
     + #include "trailer.h"
     + 
     +-static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
     ++static void t_trailer_iterator(const char *msg, size_t num_expected_objects)
     + {
     + 	struct trailer_iterator iter;
     + 	size_t i = 0;
     +@@ t/unit-tests/t-trailer.c: static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
     + 		i++;
     + 	trailer_iterator_release(&iter);
     + 
     +-	check_uint(i, ==, num_expected_trailers);
     ++	check_uint(i, ==, num_expected_objects);
     + }
     + 
     + static void run_t_trailer_iterator(void)
     +@@ t/unit-tests/t-trailer.c: static void run_t_trailer_iterator(void)
     + 	static struct test_cases {
     + 		const char *name;
     + 		const char *msg;
     +-		size_t num_expected_trailers;
     ++		size_t num_expected_objects;
     + 	} tc[] = {
     + 		{
     + 			"empty input",
      @@ t/unit-tests/t-trailer.c: static void run_t_trailer_iterator(void)
       			"not a trailer line\n"
       			"not a trailer line\n"
     @@ t/unit-tests/t-trailer.c: static void run_t_trailer_iterator(void)
       		},
       		{
       			"with non-trailer lines (one too many) in trailer block",
     +@@ t/unit-tests/t-trailer.c: static void run_t_trailer_iterator(void)
     + 
     + 	for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
     + 		TEST(t_trailer_iterator(tc[i].msg,
     +-					tc[i].num_expected_trailers),
     ++					tc[i].num_expected_objects),
     + 		     "%s", tc[i].name);
     + 	}
     + }
      
       ## trailer.c ##
      @@ trailer.c: void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
  4:  84897cf5c83 =  4:  4a1d18da574 sequencer: use the trailer iterator
  5:  e961d49cd40 =  5:  460979ba964 interpret-trailers: access trailer_info with new helpers
  6:  093f68f3658 =  6:  d217858c637 trailer: make parse_trailers() return trailer_info pointer
  7:  0e9ae049b88 !  7:  49c66c48cc1 trailer: make trailer_info struct private
     @@ Commit message
            (2) external API users are unable to peer inside this struct (because
                it is only ever exposed as an opaque pointer).
      
     -    There are a couple disadvantages:
     +    There are a few disadvantages:
      
            (A) every time the member of the struct is accessed an extra pointer
                dereference must be done, and
     @@ Commit message
            (B) for users of trailer_info outside trailer.c, this struct can no
                longer be allocated on the stack and may only be allocated on the
                heap (because its definition is hidden away in trailer.c) and
     -          appropriately deallocated by the user.
     +          appropriately deallocated by the user, and
     +
     +      (C) without good documentation on the API, the opaque struct is
     +          hostile to programmers by going opposite to the "Show me your
     +          data structures, and I won't usually need your code; it'll
     +          be obvious." mantra [2].
      
          (The disadvantages have already been observed in the two preparatory
          commits that precede this one.) This commit believes that the benefits
     @@ Commit message
          [1] Hanson, David R. "C Interfaces and Implementations: Techniques for
              Creating Reusable Software". Addison Wesley, 1997. p. 22
      
     +    [2] Raymond, Eric S. "The Cathedral and the Bazaar: Musings on Linux and
     +        Open Source by an Accidental Revolutionary". O'Reilly, 1999.
     +
     +    Helped-by: Junio C Hamano <gitster@pobox.com>
          Helped-by: Christian Couder <chriscool@tuxfamily.org>
          Signed-off-by: Linus Arver <linusa@google.com>
      
  8:  eca77a1a462 =  8:  56e1cca4b7b trailer: retire trailer_info_get() from API
  -:  ----------- >  9:  35304837e08 trailer: document parse_trailers() usage
  -:  ----------- > 10:  4d53707f836 trailer unit tests: inspect iterator contents

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 67+ messages in thread

* [PATCH v3 01/10] Makefile: sort UNIT_TEST_PROGRAMS
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
@ 2024-04-26  0:26     ` Linus Arver via GitGitGadget
  2024-04-26  0:26     ` [PATCH v3 02/10] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
                       ` (10 subsequent siblings)
  11 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-26  0:26 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Signed-off-by: Linus Arver <linusa@google.com>
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4e255c81f22..d3a3f16f076 100644
--- a/Makefile
+++ b/Makefile
@@ -1343,10 +1343,10 @@ THIRD_PARTY_SOURCES += sha1collisiondetection/%
 THIRD_PARTY_SOURCES += sha1dc/%
 
 UNIT_TEST_PROGRAMS += t-basic
-UNIT_TEST_PROGRAMS += t-mem-pool
-UNIT_TEST_PROGRAMS += t-strbuf
 UNIT_TEST_PROGRAMS += t-ctype
+UNIT_TEST_PROGRAMS += t-mem-pool
 UNIT_TEST_PROGRAMS += t-prio-queue
+UNIT_TEST_PROGRAMS += t-strbuf
 UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v3 02/10] trailer: add unit tests for trailer iterator
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
  2024-04-26  0:26     ` [PATCH v3 01/10] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
@ 2024-04-26  0:26     ` Linus Arver via GitGitGadget
  2024-04-26 14:51       ` Christian Couder
  2024-04-26  0:26     ` [PATCH v3 03/10] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
                       ` (9 subsequent siblings)
  11 siblings, 1 reply; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-26  0:26 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Test the number of trailers found by the iterator (to be more precise,
the parsing mechanism which the iterator just walks over) when given
some some arbitrary log message.

We test the iterator because it is a public interface function exposed
by the trailer API (we generally don't want to test internal
implementation details which are, unlike the API, subject to drastic
changes).

Signed-off-by: Linus Arver <linusa@google.com>
---
 Makefile                 |   1 +
 t/unit-tests/t-trailer.c | 174 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 175 insertions(+)
 create mode 100644 t/unit-tests/t-trailer.c

diff --git a/Makefile b/Makefile
index d3a3f16f076..5418ddd03be 100644
--- a/Makefile
+++ b/Makefile
@@ -1347,6 +1347,7 @@ UNIT_TEST_PROGRAMS += t-ctype
 UNIT_TEST_PROGRAMS += t-mem-pool
 UNIT_TEST_PROGRAMS += t-prio-queue
 UNIT_TEST_PROGRAMS += t-strbuf
+UNIT_TEST_PROGRAMS += t-trailer
 UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o
diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c
new file mode 100644
index 00000000000..c1f897235c7
--- /dev/null
+++ b/t/unit-tests/t-trailer.c
@@ -0,0 +1,174 @@
+#include "test-lib.h"
+#include "trailer.h"
+
+static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
+{
+	struct trailer_iterator iter;
+	size_t i = 0;
+
+	trailer_iterator_init(&iter, msg);
+	while (trailer_iterator_advance(&iter))
+		i++;
+	trailer_iterator_release(&iter);
+
+	check_uint(i, ==, num_expected_trailers);
+}
+
+static void run_t_trailer_iterator(void)
+{
+	static struct test_cases {
+		const char *name;
+		const char *msg;
+		size_t num_expected_trailers;
+	} tc[] = {
+		{
+			"empty input",
+			"",
+			0
+		},
+		{
+			"no newline at beginning",
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n",
+			0
+		},
+		{
+			"newline at beginning",
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n",
+			3
+		},
+		{
+			"without body text",
+			"subject: foo bar\n"
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n",
+			3
+		},
+		{
+			"with body text, without divider",
+			"my subject\n"
+			"\n"
+			"my body which is long\n"
+			"and contains some special\n"
+			"chars like : = ? !\n"
+			"hello\n"
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n"
+			"Signed-off-by: x\n",
+			4
+		},
+		{
+			"with body text, without divider (second trailer block)",
+			"my subject\n"
+			"\n"
+			"my body which is long\n"
+			"and contains some special\n"
+			"chars like : = ? !\n"
+			"hello\n"
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n"
+			"Signed-off-by: x\n"
+			"\n"
+			/*
+			 * Because this is the last trailer block, it takes
+			 * precedence over the first one encountered above.
+			 */
+			"Helped-by: x\n"
+			"Signed-off-by: x\n",
+			2
+		},
+		{
+			"with body text, with divider",
+			"my subject\n"
+			"\n"
+			"my body which is long\n"
+			"and contains some special\n"
+			"chars like : = ? !\n"
+			"hello\n"
+			"\n"
+			"---\n"
+			"\n"
+			/*
+			 * This trailer still counts because the iterator
+			 * always ignores the divider.
+			 */
+			"Signed-off-by: x\n",
+			1
+		},
+		{
+			"with non-trailer lines in trailer block",
+			"subject: foo bar\n"
+			"\n"
+			/*
+			 * Even though this trailer block has a non-trailer line
+			 * in it, it's still a valid trailer block because it's
+			 * at least 25% trailers and is Git-generated (see
+			 * git_generated_prefixes[] in trailer.c).
+			 */
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"Signed-off-by: x\n",
+			1
+		},
+		{
+			"with non-trailer lines (one too many) in trailer block",
+			"subject: foo bar\n"
+			"\n"
+			/*
+			 * This block has only 20% trailers, so it's below the
+			 * 25% threshold.
+			 */
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"Signed-off-by: x\n",
+			0
+		},
+		{
+			"with non-trailer lines (only 1) in trailer block, but no Git-generated trailers",
+			"subject: foo bar\n"
+			"\n"
+			/*
+			 * This block has only 1 non-trailer out of 10 (IOW, 90%
+			 * trailers) but is not considered a trailer block
+			 * because the 25% threshold only applies to cases where
+			 * there was a Git-generated trailer.
+			 */
+			"Reviewed-by: x\n"
+			"Reviewed-by: x\n"
+			"Reviewed-by: x\n"
+			"Helped-by: x\n"
+			"Helped-by: x\n"
+			"Helped-by: x\n"
+			"Acked-by: x\n"
+			"Acked-by: x\n"
+			"Acked-by: x\n"
+			"not a trailer line\n",
+			0
+		},
+	};
+
+	for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
+		TEST(t_trailer_iterator(tc[i].msg,
+					tc[i].num_expected_trailers),
+		     "%s", tc[i].name);
+	}
+}
+
+int cmd_main(int argc, const char **argv)
+{
+	run_t_trailer_iterator();
+	return test_done();
+}
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v3 03/10] trailer: teach iterator about non-trailer lines
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
  2024-04-26  0:26     ` [PATCH v3 01/10] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
  2024-04-26  0:26     ` [PATCH v3 02/10] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
@ 2024-04-26  0:26     ` Linus Arver via GitGitGadget
  2024-04-27 12:50       ` Christian Couder
  2024-04-26  0:26     ` [PATCH v3 04/10] sequencer: use the trailer iterator Linus Arver via GitGitGadget
                       ` (8 subsequent siblings)
  11 siblings, 1 reply; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-26  0:26 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Previously the iterator did not iterate over non-trailer lines. This was
somewhat unfortunate, because trailer blocks could have non-trailer
lines in them since 146245063e (trailer: allow non-trailers in trailer
block, 2016-10-21), which was before the iterator was created in
f0939a0eb1 (trailer: add interface for iterating over commit trailers,
2020-09-27).

So if trailer API users wanted to iterate over all lines in a trailer
block (including non-trailer lines), they could not use the iterator and
were forced to use the lower-level trailer_info struct directly (which
provides a raw string array that includes all lines in the trailer
block).

Change the iterator's behavior so that we also iterate over non-trailer
lines, instead of skipping over them. The new "raw" member of the
iterator allows API users to access previously inaccessible non-trailer
lines. Reword the variable "trailer" to just "line" because this
variable can now hold both trailer lines _and_ non-trailer lines.

The new "raw" member is important because anyone currently not using the
iterator is using trailer_info's raw string array directly to access
lines to check what the combined key + value looks like. If we didn't
provide a "raw" member here, iterator users would have to re-construct
the unparsed line by concatenating the key and value back together again
--- which places an undue burden for iterator users.

The next commit demonstrates the use of the iterator in sequencer.c as an
example of where "raw" will be useful, so that it can start using the
iterator.

For the existing use of the iterator in builtin/shortlog.c, we don't
have to change the code there because that code does

    trailer_iterator_init(&iter, body);
    while (trailer_iterator_advance(&iter)) {
        const char *value = iter.val.buf;

        if (!string_list_has_string(&log->trailers, iter.key.buf))
            continue;

        ...

and the

        if (!string_list_has_string(&log->trailers, iter.key.buf))

condition already skips over non-trailer lines (iter.key.buf is empty
for non-trailer lines, making the comparison still work even with this
commit).

Rename "num_expected_trailers" to "num_expected_objects" in
t/unit-tests/t-trailer.c because the items we iterate over now include
non-trailer lines.

Signed-off-by: Linus Arver <linusa@google.com>
---
 t/unit-tests/t-trailer.c | 16 +++++++++++-----
 trailer.c                | 12 +++++-------
 trailer.h                |  8 ++++++++
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c
index c1f897235c7..262e2838273 100644
--- a/t/unit-tests/t-trailer.c
+++ b/t/unit-tests/t-trailer.c
@@ -1,7 +1,7 @@
 #include "test-lib.h"
 #include "trailer.h"
 
-static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
+static void t_trailer_iterator(const char *msg, size_t num_expected_objects)
 {
 	struct trailer_iterator iter;
 	size_t i = 0;
@@ -11,7 +11,7 @@ static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
 		i++;
 	trailer_iterator_release(&iter);
 
-	check_uint(i, ==, num_expected_trailers);
+	check_uint(i, ==, num_expected_objects);
 }
 
 static void run_t_trailer_iterator(void)
@@ -19,7 +19,7 @@ static void run_t_trailer_iterator(void)
 	static struct test_cases {
 		const char *name;
 		const char *msg;
-		size_t num_expected_trailers;
+		size_t num_expected_objects;
 	} tc[] = {
 		{
 			"empty input",
@@ -119,7 +119,13 @@ static void run_t_trailer_iterator(void)
 			"not a trailer line\n"
 			"not a trailer line\n"
 			"Signed-off-by: x\n",
-			1
+			/*
+			 * Even though there is only really 1 real "trailer"
+			 * (Signed-off-by), we still have 4 trailer objects
+			 * because we still want to iterate through the entire
+			 * block.
+			 */
+			4
 		},
 		{
 			"with non-trailer lines (one too many) in trailer block",
@@ -162,7 +168,7 @@ static void run_t_trailer_iterator(void)
 
 	for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
 		TEST(t_trailer_iterator(tc[i].msg,
-					tc[i].num_expected_trailers),
+					tc[i].num_expected_objects),
 		     "%s", tc[i].name);
 	}
 }
diff --git a/trailer.c b/trailer.c
index 3e4dab9c065..4700c441442 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1146,17 +1146,15 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	while (iter->internal.cur < iter->internal.info.trailer_nr) {
-		char *trailer = iter->internal.info.trailers[iter->internal.cur++];
-		int separator_pos = find_separator(trailer, separators);
-
-		if (separator_pos < 1)
-			continue; /* not a real trailer */
+	if (iter->internal.cur < iter->internal.info.trailer_nr) {
+		char *line = iter->internal.info.trailers[iter->internal.cur++];
+		int separator_pos = find_separator(line, separators);
 
+		iter->raw = line;
 		strbuf_reset(&iter->key);
 		strbuf_reset(&iter->val);
 		parse_trailer(&iter->key, &iter->val, NULL,
-			      trailer, separator_pos);
+			      line, separator_pos);
 		/* Always unfold values during iteration. */
 		unfold_value(&iter->val);
 		return 1;
diff --git a/trailer.h b/trailer.h
index 9f42aa75994..ebafa3657e4 100644
--- a/trailer.h
+++ b/trailer.h
@@ -125,6 +125,14 @@ void format_trailers_from_commit(const struct process_trailer_options *,
  *   trailer_iterator_release(&iter);
  */
 struct trailer_iterator {
+	/*
+	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
+	 * key/val pair as part of a trailer block. A trailer block can be
+	 * either 100% trailer lines, or mixed in with non-trailer lines (in
+	 * which case at least 25% must be trailer lines).
+	 */
+	const char *raw;
+
 	struct strbuf key;
 	struct strbuf val;
 
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v3 04/10] sequencer: use the trailer iterator
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
                       ` (2 preceding siblings ...)
  2024-04-26  0:26     ` [PATCH v3 03/10] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
@ 2024-04-26  0:26     ` Linus Arver via GitGitGadget
  2024-04-26  0:26     ` [PATCH v3 05/10] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
                       ` (7 subsequent siblings)
  11 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-26  0:26 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Instead of calling "trailer_info_get()", which is a low-level function
in the trailers implementation (trailer.c), call
trailer_iterator_advance(), which was specifically designed for public
consumption in f0939a0eb1 (trailer: add interface for iterating over
commit trailers, 2020-09-27).

Avoiding "trailer_info_get()" means we don't have to worry about options
like "no_divider" (relevant for parsing trailers). We also don't have to
check for things like "info.trailer_start == info.trailer_end" to see
whether there were any trailers (instead we can just check to see
whether the iterator advanced at all).

Note how we have to use "iter.raw" in order to get the same behavior as
before when we iterated over the unparsed string array (char **trailers)
in trailer_info.

Signed-off-by: Linus Arver <linusa@google.com>
---
 sequencer.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/sequencer.c b/sequencer.c
index ea1441e6174..4c1f6c675e7 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -319,35 +319,32 @@ static const char *get_todo_path(const struct replay_opts *opts)
 static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob,
 	size_t ignore_footer)
 {
-	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
-	struct trailer_info info;
-	size_t i;
+	struct trailer_iterator iter;
+	size_t i = 0;
 	int found_sob = 0, found_sob_last = 0;
 	char saved_char;
 
-	opts.no_divider = 1;
-
 	if (ignore_footer) {
 		saved_char = sb->buf[sb->len - ignore_footer];
 		sb->buf[sb->len - ignore_footer] = '\0';
 	}
 
-	trailer_info_get(&opts, sb->buf, &info);
+	trailer_iterator_init(&iter, sb->buf);
 
 	if (ignore_footer)
 		sb->buf[sb->len - ignore_footer] = saved_char;
 
-	if (info.trailer_block_start == info.trailer_block_end)
-		return 0;
+	while (trailer_iterator_advance(&iter)) {
+		i++;
+		if (sob && !strncmp(iter.raw, sob->buf, sob->len))
+			found_sob = i;
+	}
+	trailer_iterator_release(&iter);
 
-	for (i = 0; i < info.trailer_nr; i++)
-		if (sob && !strncmp(info.trailers[i], sob->buf, sob->len)) {
-			found_sob = 1;
-			if (i == info.trailer_nr - 1)
-				found_sob_last = 1;
-		}
+	if (!i)
+		return 0;
 
-	trailer_info_release(&info);
+	found_sob_last = (int)i == found_sob;
 
 	if (found_sob_last)
 		return 3;
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v3 05/10] interpret-trailers: access trailer_info with new helpers
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
                       ` (3 preceding siblings ...)
  2024-04-26  0:26     ` [PATCH v3 04/10] sequencer: use the trailer iterator Linus Arver via GitGitGadget
@ 2024-04-26  0:26     ` Linus Arver via GitGitGadget
  2024-04-26  0:26     ` [PATCH v3 06/10] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
                       ` (6 subsequent siblings)
  11 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-26  0:26 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Instead of directly accessing trailer_info members, access them
indirectly through new helper functions exposed by the trailer API.

This is the first of two preparatory commits which will allow us to
use the so-called "pimpl" (pointer to implementation) idiom for the
trailer API, by making the trailer_info struct private to the trailer
implementation (and thus hidden from the API).

Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linusa@google.com>
---
 builtin/interpret-trailers.c | 12 ++++++------
 trailer.c                    | 21 +++++++++++++++++++++
 trailer.h                    |  4 ++++
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/builtin/interpret-trailers.c b/builtin/interpret-trailers.c
index 11f4ce9e4a2..f3240682e35 100644
--- a/builtin/interpret-trailers.c
+++ b/builtin/interpret-trailers.c
@@ -141,7 +141,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	LIST_HEAD(head);
 	struct strbuf sb = STRBUF_INIT;
 	struct strbuf trailer_block = STRBUF_INIT;
-	struct trailer_info info;
+	struct trailer_info *info = trailer_info_new();
 	FILE *outfile = stdout;
 
 	trailer_config_init();
@@ -151,13 +151,13 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	if (opts->in_place)
 		outfile = create_in_place_tempfile(file);
 
-	parse_trailers(opts, &info, sb.buf, &head);
+	parse_trailers(opts, info, sb.buf, &head);
 
 	/* Print the lines before the trailers */
 	if (!opts->only_trailers)
-		fwrite(sb.buf, 1, info.trailer_block_start, outfile);
+		fwrite(sb.buf, 1, trailer_block_start(info), outfile);
 
-	if (!opts->only_trailers && !info.blank_line_before_trailer)
+	if (!opts->only_trailers && !blank_line_before_trailer_block(info))
 		fprintf(outfile, "\n");
 
 
@@ -178,8 +178,8 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 
 	/* Print the lines after the trailers as is */
 	if (!opts->only_trailers)
-		fwrite(sb.buf + info.trailer_block_end, 1, sb.len - info.trailer_block_end, outfile);
-	trailer_info_release(&info);
+		fwrite(sb.buf + trailer_block_end(info), 1, sb.len - trailer_block_end(info), outfile);
+	trailer_info_release(info);
 
 	if (opts->in_place)
 		if (rename_tempfile(&trailers_tempfile, file))
diff --git a/trailer.c b/trailer.c
index 4700c441442..95b4c9b8f19 100644
--- a/trailer.c
+++ b/trailer.c
@@ -952,6 +952,12 @@ static void unfold_value(struct strbuf *val)
 	strbuf_release(&out);
 }
 
+struct trailer_info *trailer_info_new(void)
+{
+	struct trailer_info *info = xcalloc(1, sizeof(*info));
+	return info;
+}
+
 /*
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
@@ -1000,6 +1006,21 @@ void free_trailers(struct list_head *trailers)
 	}
 }
 
+size_t trailer_block_start(struct trailer_info *info)
+{
+	return info->trailer_block_start;
+}
+
+size_t trailer_block_end(struct trailer_info *info)
+{
+	return info->trailer_block_end;
+}
+
+int blank_line_before_trailer_block(struct trailer_info *info)
+{
+	return info->blank_line_before_trailer;
+}
+
 void trailer_info_get(const struct process_trailer_options *opts,
 		      const char *str,
 		      struct trailer_info *info)
diff --git a/trailer.h b/trailer.h
index ebafa3657e4..9ac4be853c5 100644
--- a/trailer.h
+++ b/trailer.h
@@ -97,6 +97,10 @@ void parse_trailers(const struct process_trailer_options *,
 void trailer_info_get(const struct process_trailer_options *,
 		      const char *str,
 		      struct trailer_info *);
+size_t trailer_block_start(struct trailer_info *);
+size_t trailer_block_end(struct trailer_info *);
+int blank_line_before_trailer_block(struct trailer_info *);
+struct trailer_info *trailer_info_new(void);
 
 void trailer_info_release(struct trailer_info *info);
 
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v3 06/10] trailer: make parse_trailers() return trailer_info pointer
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
                       ` (4 preceding siblings ...)
  2024-04-26  0:26     ` [PATCH v3 05/10] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
@ 2024-04-26  0:26     ` Linus Arver via GitGitGadget
  2024-04-26  0:26     ` [PATCH v3 07/10] trailer: make trailer_info struct private Linus Arver via GitGitGadget
                       ` (5 subsequent siblings)
  11 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-26  0:26 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

This is the second and final preparatory commit for making the
trailer_info struct private to the trailer implementation.

Make trailer_info_get() do the actual work of allocating a new
trailer_info struct, and return a pointer to it. Because
parse_trailers() wraps around trailer_info_get(), it too can return this
pointer to the caller. From the trailer API user's perspective, the call
to trailer_info_new() can be replaced with parse_trailers(); do so in
interpret-trailers.

Because trailer_info_new() is no longer called by interpret-trailers,
remove this function from the trailer API.

With this change, we no longer allocate trailer_info on the stack ---
all uses of it are via a pointer where the actual data is always
allocated at runtime through trailer_info_new(). Make
trailer_info_release() free this dynamically allocated memory.

Finally, due to the way the function signatures of parse_trailers() and
trailer_info_get() have changed, update the callsites in
format_trailers_from_commit() and trailer_iterator_init() accordingly.

Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linusa@google.com>
---
 builtin/interpret-trailers.c |  4 ++--
 trailer.c                    | 41 +++++++++++++++++++-----------------
 trailer.h                    | 17 ++++++---------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/builtin/interpret-trailers.c b/builtin/interpret-trailers.c
index f3240682e35..6bf8cec005a 100644
--- a/builtin/interpret-trailers.c
+++ b/builtin/interpret-trailers.c
@@ -141,7 +141,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	LIST_HEAD(head);
 	struct strbuf sb = STRBUF_INIT;
 	struct strbuf trailer_block = STRBUF_INIT;
-	struct trailer_info *info = trailer_info_new();
+	struct trailer_info *info;
 	FILE *outfile = stdout;
 
 	trailer_config_init();
@@ -151,7 +151,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	if (opts->in_place)
 		outfile = create_in_place_tempfile(file);
 
-	parse_trailers(opts, info, sb.buf, &head);
+	info = parse_trailers(opts, sb.buf, &head);
 
 	/* Print the lines before the trailers */
 	if (!opts->only_trailers)
diff --git a/trailer.c b/trailer.c
index 95b4c9b8f19..9179dd802c6 100644
--- a/trailer.c
+++ b/trailer.c
@@ -952,7 +952,7 @@ static void unfold_value(struct strbuf *val)
 	strbuf_release(&out);
 }
 
-struct trailer_info *trailer_info_new(void)
+static struct trailer_info *trailer_info_new(void)
 {
 	struct trailer_info *info = xcalloc(1, sizeof(*info));
 	return info;
@@ -962,16 +962,16 @@ struct trailer_info *trailer_info_new(void)
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
  */
-void parse_trailers(const struct process_trailer_options *opts,
-		    struct trailer_info *info,
-		    const char *str,
-		    struct list_head *head)
+struct trailer_info *parse_trailers(const struct process_trailer_options *opts,
+				    const char *str,
+				    struct list_head *head)
 {
+	struct trailer_info *info;
 	struct strbuf tok = STRBUF_INIT;
 	struct strbuf val = STRBUF_INIT;
 	size_t i;
 
-	trailer_info_get(opts, str, info);
+	info = trailer_info_get(opts, str);
 
 	for (i = 0; i < info->trailer_nr; i++) {
 		int separator_pos;
@@ -995,6 +995,8 @@ void parse_trailers(const struct process_trailer_options *opts,
 					 strbuf_detach(&val, NULL));
 		}
 	}
+
+	return info;
 }
 
 void free_trailers(struct list_head *trailers)
@@ -1021,10 +1023,10 @@ int blank_line_before_trailer_block(struct trailer_info *info)
 	return info->blank_line_before_trailer;
 }
 
-void trailer_info_get(const struct process_trailer_options *opts,
-		      const char *str,
-		      struct trailer_info *info)
+struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
+				      const char *str)
 {
+	struct trailer_info *info = trailer_info_new();
 	size_t end_of_log_message = 0, trailer_block_start = 0;
 	struct strbuf **trailer_lines, **ptr;
 	char **trailer_strings = NULL;
@@ -1063,6 +1065,8 @@ void trailer_info_get(const struct process_trailer_options *opts,
 	info->trailer_block_end = end_of_log_message;
 	info->trailers = trailer_strings;
 	info->trailer_nr = nr;
+
+	return info;
 }
 
 void trailer_info_release(struct trailer_info *info)
@@ -1071,6 +1075,7 @@ void trailer_info_release(struct trailer_info *info)
 	for (i = 0; i < info->trailer_nr; i++)
 		free(info->trailers[i]);
 	free(info->trailers);
+	free(info);
 }
 
 void format_trailers(const struct process_trailer_options *opts,
@@ -1138,21 +1143,19 @@ void format_trailers_from_commit(const struct process_trailer_options *opts,
 				 struct strbuf *out)
 {
 	LIST_HEAD(trailer_objects);
-	struct trailer_info info;
-
-	parse_trailers(opts, &info, msg, &trailer_objects);
+	struct trailer_info *info = parse_trailers(opts, msg, &trailer_objects);
 
 	/* If we want the whole block untouched, we can take the fast path. */
 	if (!opts->only_trailers && !opts->unfold && !opts->filter &&
 	    !opts->separator && !opts->key_only && !opts->value_only &&
 	    !opts->key_value_separator) {
-		strbuf_add(out, msg + info.trailer_block_start,
-			   info.trailer_block_end - info.trailer_block_start);
+		strbuf_add(out, msg + info->trailer_block_start,
+			   info->trailer_block_end - info->trailer_block_start);
 	} else
 		format_trailers(opts, &trailer_objects, out);
 
 	free_trailers(&trailer_objects);
-	trailer_info_release(&info);
+	trailer_info_release(info);
 }
 
 void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
@@ -1161,14 +1164,14 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 	strbuf_init(&iter->key, 0);
 	strbuf_init(&iter->val, 0);
 	opts.no_divider = 1;
-	trailer_info_get(&opts, msg, &iter->internal.info);
+	iter->internal.info = trailer_info_get(&opts, msg);
 	iter->internal.cur = 0;
 }
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	if (iter->internal.cur < iter->internal.info.trailer_nr) {
-		char *line = iter->internal.info.trailers[iter->internal.cur++];
+	if (iter->internal.cur < iter->internal.info->trailer_nr) {
+		char *line = iter->internal.info->trailers[iter->internal.cur++];
 		int separator_pos = find_separator(line, separators);
 
 		iter->raw = line;
@@ -1185,7 +1188,7 @@ int trailer_iterator_advance(struct trailer_iterator *iter)
 
 void trailer_iterator_release(struct trailer_iterator *iter)
 {
-	trailer_info_release(&iter->internal.info);
+	trailer_info_release(iter->internal.info);
 	strbuf_release(&iter->val);
 	strbuf_release(&iter->key);
 }
diff --git a/trailer.h b/trailer.h
index 9ac4be853c5..b32213a9e23 100644
--- a/trailer.h
+++ b/trailer.h
@@ -89,18 +89,15 @@ void parse_trailers_from_command_line_args(struct list_head *arg_head,
 void process_trailers_lists(struct list_head *head,
 			    struct list_head *arg_head);
 
-void parse_trailers(const struct process_trailer_options *,
-		    struct trailer_info *,
-		    const char *str,
-		    struct list_head *head);
-
-void trailer_info_get(const struct process_trailer_options *,
-		      const char *str,
-		      struct trailer_info *);
+struct trailer_info *parse_trailers(const struct process_trailer_options *,
+				    const char *str,
+				    struct list_head *head);
+struct trailer_info *trailer_info_get(const struct process_trailer_options *,
+				      const char *str);
+
 size_t trailer_block_start(struct trailer_info *);
 size_t trailer_block_end(struct trailer_info *);
 int blank_line_before_trailer_block(struct trailer_info *);
-struct trailer_info *trailer_info_new(void);
 
 void trailer_info_release(struct trailer_info *info);
 
@@ -142,7 +139,7 @@ struct trailer_iterator {
 
 	/* private */
 	struct {
-		struct trailer_info info;
+		struct trailer_info *info;
 		size_t cur;
 	} internal;
 };
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v3 07/10] trailer: make trailer_info struct private
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
                       ` (5 preceding siblings ...)
  2024-04-26  0:26     ` [PATCH v3 06/10] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
@ 2024-04-26  0:26     ` Linus Arver via GitGitGadget
  2024-04-26  0:26     ` [PATCH v3 08/10] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
                       ` (4 subsequent siblings)
  11 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-26  0:26 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

In 13211ae23f (trailer: separate public from internal portion of
trailer_iterator, 2023-09-09) we moved trailer_info behind an anonymous
struct to discourage use by trailer.h API users. However it still left
open the possibility of external use of trailer_info itself. Now that
there are no external users of trailer_info, we can make this struct
private.

Make this struct private by putting its definition inside trailer.c.
This has two benefits:

  (1) it makes the surface area of the public facing
      interface (trailer.h) smaller, and

  (2) external API users are unable to peer inside this struct (because
      it is only ever exposed as an opaque pointer).

There are a few disadvantages:

  (A) every time the member of the struct is accessed an extra pointer
      dereference must be done, and

  (B) for users of trailer_info outside trailer.c, this struct can no
      longer be allocated on the stack and may only be allocated on the
      heap (because its definition is hidden away in trailer.c) and
      appropriately deallocated by the user, and

  (C) without good documentation on the API, the opaque struct is
      hostile to programmers by going opposite to the "Show me your
      data structures, and I won't usually need your code; it'll
      be obvious." mantra [2].

(The disadvantages have already been observed in the two preparatory
commits that precede this one.) This commit believes that the benefits
outweigh the disadvantages for designing APIs, as explained below.

Making trailer_info private exposes existing deficiencies in the API.
This is because users of this struct had full access to its internals,
so there wasn't much need to actually design it to be "complete" in the
sense that API users only needed to use what was provided by the API.
For example, the location of the trailer block (start/end offsets
relative to the start of the input text) was accessible by looking at
these struct members directly. Now that the struct is private, we have
to expose new API functions to allow clients to access this
information (see builtin/interpret-trailers.c).

The idea in this commit to hide implementation details behind an "opaque
pointer" is also known as the "pimpl" (pointer to implementation) idiom
in C++ and is a common pattern in that language (where, for example,
abstract classes only have pointers to concrete classes).

However, the original inspiration to use this idiom does not come from
C++, but instead the book "C Interfaces and Implementations: Techniques
for Creating Reusable Software" [1]. This book recommends opaque
pointers as a good design principle for designing C libraries, using the
term "interface" as the functions defined in *.h (header) files and
"implementation" as the corresponding *.c file which define the
interfaces.

The book says this about opaque pointers:

    ... clients can manipulate such pointers freely, but they can’t
    dereference them; that is, they can’t look at the innards of the
    structure pointed to by them. Only the implementation has that
    privilege. Opaque pointers hide representation details and help
    catch errors.

In our case, "struct trailer_info" is now hidden from clients, and the
ways in which this opaque pointer can be used is limited to the richness
of <trailer.h>. In other words, <trailer.h> exclusively controls exactly
how "trailer_info" pointers are to be used.

[1] Hanson, David R. "C Interfaces and Implementations: Techniques for
    Creating Reusable Software". Addison Wesley, 1997. p. 22

[2] Raymond, Eric S. "The Cathedral and the Bazaar: Musings on Linux and
    Open Source by an Accidental Revolutionary". O'Reilly, 1999.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linusa@google.com>
---
 trailer.c | 21 +++++++++++++++++++++
 trailer.h | 23 ++---------------------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/trailer.c b/trailer.c
index 9179dd802c6..6167b707ae0 100644
--- a/trailer.c
+++ b/trailer.c
@@ -11,6 +11,27 @@
  * Copyright (c) 2013, 2014 Christian Couder <chriscool@tuxfamily.org>
  */
 
+struct trailer_info {
+	/*
+	 * True if there is a blank line before the location pointed to by
+	 * trailer_block_start.
+	 */
+	int blank_line_before_trailer;
+
+	/*
+	 * Offsets to the trailer block start and end positions in the input
+	 * string. If no trailer block is found, these are both set to the
+	 * "true" end of the input (find_end_of_log_message()).
+	 */
+	size_t trailer_block_start, trailer_block_end;
+
+	/*
+	 * Array of trailers found.
+	 */
+	char **trailers;
+	size_t trailer_nr;
+};
+
 struct conf_info {
 	char *name;
 	char *key;
diff --git a/trailer.h b/trailer.h
index b32213a9e23..a63e97a2663 100644
--- a/trailer.h
+++ b/trailer.h
@@ -4,6 +4,8 @@
 #include "list.h"
 #include "strbuf.h"
 
+struct trailer_info;
+
 enum trailer_where {
 	WHERE_DEFAULT,
 	WHERE_END,
@@ -29,27 +31,6 @@ int trailer_set_where(enum trailer_where *item, const char *value);
 int trailer_set_if_exists(enum trailer_if_exists *item, const char *value);
 int trailer_set_if_missing(enum trailer_if_missing *item, const char *value);
 
-struct trailer_info {
-	/*
-	 * True if there is a blank line before the location pointed to by
-	 * trailer_block_start.
-	 */
-	int blank_line_before_trailer;
-
-	/*
-	 * Offsets to the trailer block start and end positions in the input
-	 * string. If no trailer block is found, these are both set to the
-	 * "true" end of the input (find_end_of_log_message()).
-	 */
-	size_t trailer_block_start, trailer_block_end;
-
-	/*
-	 * Array of trailers found.
-	 */
-	char **trailers;
-	size_t trailer_nr;
-};
-
 /*
  * A list that represents newly-added trailers, such as those provided
  * with the --trailer command line option of git-interpret-trailers.
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v3 08/10] trailer: retire trailer_info_get() from API
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
                       ` (6 preceding siblings ...)
  2024-04-26  0:26     ` [PATCH v3 07/10] trailer: make trailer_info struct private Linus Arver via GitGitGadget
@ 2024-04-26  0:26     ` Linus Arver via GitGitGadget
  2024-04-26  0:26     ` [PATCH v3 09/10] trailer: document parse_trailers() usage Linus Arver via GitGitGadget
                       ` (3 subsequent siblings)
  11 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-26  0:26 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Make trailer_info_get() "static" to be file-scoped to trailer.c, because
no one outside of trailer.c uses it. Remove its declaration from
<trailer.h>.

We have to also reposition it to be above parse_trailers(), which
depends on it.

Signed-off-by: Linus Arver <linusa@google.com>
---
 trailer.c | 92 +++++++++++++++++++++++++++----------------------------
 trailer.h |  2 --
 2 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/trailer.c b/trailer.c
index 6167b707ae0..33b6aa7e8bd 100644
--- a/trailer.c
+++ b/trailer.c
@@ -979,6 +979,52 @@ static struct trailer_info *trailer_info_new(void)
 	return info;
 }
 
+static struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
+					     const char *str)
+{
+	struct trailer_info *info = trailer_info_new();
+	size_t end_of_log_message = 0, trailer_block_start = 0;
+	struct strbuf **trailer_lines, **ptr;
+	char **trailer_strings = NULL;
+	size_t nr = 0, alloc = 0;
+	char **last = NULL;
+
+	trailer_config_init();
+
+	end_of_log_message = find_end_of_log_message(str, opts->no_divider);
+	trailer_block_start = find_trailer_block_start(str, end_of_log_message);
+
+	trailer_lines = strbuf_split_buf(str + trailer_block_start,
+					 end_of_log_message - trailer_block_start,
+					 '\n',
+					 0);
+	for (ptr = trailer_lines; *ptr; ptr++) {
+		if (last && isspace((*ptr)->buf[0])) {
+			struct strbuf sb = STRBUF_INIT;
+			strbuf_attach(&sb, *last, strlen(*last), strlen(*last));
+			strbuf_addbuf(&sb, *ptr);
+			*last = strbuf_detach(&sb, NULL);
+			continue;
+		}
+		ALLOC_GROW(trailer_strings, nr + 1, alloc);
+		trailer_strings[nr] = strbuf_detach(*ptr, NULL);
+		last = find_separator(trailer_strings[nr], separators) >= 1
+			? &trailer_strings[nr]
+			: NULL;
+		nr++;
+	}
+	strbuf_list_free(trailer_lines);
+
+	info->blank_line_before_trailer = ends_with_blank_line(str,
+							       trailer_block_start);
+	info->trailer_block_start = trailer_block_start;
+	info->trailer_block_end = end_of_log_message;
+	info->trailers = trailer_strings;
+	info->trailer_nr = nr;
+
+	return info;
+}
+
 /*
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
@@ -1044,52 +1090,6 @@ int blank_line_before_trailer_block(struct trailer_info *info)
 	return info->blank_line_before_trailer;
 }
 
-struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
-				      const char *str)
-{
-	struct trailer_info *info = trailer_info_new();
-	size_t end_of_log_message = 0, trailer_block_start = 0;
-	struct strbuf **trailer_lines, **ptr;
-	char **trailer_strings = NULL;
-	size_t nr = 0, alloc = 0;
-	char **last = NULL;
-
-	trailer_config_init();
-
-	end_of_log_message = find_end_of_log_message(str, opts->no_divider);
-	trailer_block_start = find_trailer_block_start(str, end_of_log_message);
-
-	trailer_lines = strbuf_split_buf(str + trailer_block_start,
-					 end_of_log_message - trailer_block_start,
-					 '\n',
-					 0);
-	for (ptr = trailer_lines; *ptr; ptr++) {
-		if (last && isspace((*ptr)->buf[0])) {
-			struct strbuf sb = STRBUF_INIT;
-			strbuf_attach(&sb, *last, strlen(*last), strlen(*last));
-			strbuf_addbuf(&sb, *ptr);
-			*last = strbuf_detach(&sb, NULL);
-			continue;
-		}
-		ALLOC_GROW(trailer_strings, nr + 1, alloc);
-		trailer_strings[nr] = strbuf_detach(*ptr, NULL);
-		last = find_separator(trailer_strings[nr], separators) >= 1
-			? &trailer_strings[nr]
-			: NULL;
-		nr++;
-	}
-	strbuf_list_free(trailer_lines);
-
-	info->blank_line_before_trailer = ends_with_blank_line(str,
-							       trailer_block_start);
-	info->trailer_block_start = trailer_block_start;
-	info->trailer_block_end = end_of_log_message;
-	info->trailers = trailer_strings;
-	info->trailer_nr = nr;
-
-	return info;
-}
-
 void trailer_info_release(struct trailer_info *info)
 {
 	size_t i;
diff --git a/trailer.h b/trailer.h
index a63e97a2663..1b7422fa2b0 100644
--- a/trailer.h
+++ b/trailer.h
@@ -73,8 +73,6 @@ void process_trailers_lists(struct list_head *head,
 struct trailer_info *parse_trailers(const struct process_trailer_options *,
 				    const char *str,
 				    struct list_head *head);
-struct trailer_info *trailer_info_get(const struct process_trailer_options *,
-				      const char *str);
 
 size_t trailer_block_start(struct trailer_info *);
 size_t trailer_block_end(struct trailer_info *);
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v3 09/10] trailer: document parse_trailers() usage
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
                       ` (7 preceding siblings ...)
  2024-04-26  0:26     ` [PATCH v3 08/10] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
@ 2024-04-26  0:26     ` Linus Arver via GitGitGadget
  2024-04-26  0:26     ` [PATCH v3 10/10] trailer unit tests: inspect iterator contents Linus Arver via GitGitGadget
                       ` (2 subsequent siblings)
  11 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-26  0:26 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Explain how to use parse_trailers(), because earlier we made the
trailer_info struct opaque. That is, because clients can no longer peek
inside it, we should give them guidance about how the (pointer to the)
opaque struct can still be useful to them.

Rename "head" struct to "trailer_objects" to make the wording of the new
comments a bit easier to read (because "head" itself doesn't really have
any domain-specific meaning here).

Signed-off-by: Linus Arver <linusa@google.com>
---
 trailer.c |  8 ++++----
 trailer.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/trailer.c b/trailer.c
index 33b6aa7e8bd..406745264aa 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1026,12 +1026,12 @@ static struct trailer_info *trailer_info_get(const struct process_trailer_option
 }
 
 /*
- * Parse trailers in "str", populating the trailer info and "head"
+ * Parse trailers in "str", populating the trailer info and "trailer_objects"
  * linked list structure.
  */
 struct trailer_info *parse_trailers(const struct process_trailer_options *opts,
 				    const char *str,
-				    struct list_head *head)
+				    struct list_head *trailer_objects)
 {
 	struct trailer_info *info;
 	struct strbuf tok = STRBUF_INIT;
@@ -1051,13 +1051,13 @@ struct trailer_info *parse_trailers(const struct process_trailer_options *opts,
 				      separator_pos);
 			if (opts->unfold)
 				unfold_value(&val);
-			add_trailer_item(head,
+			add_trailer_item(trailer_objects,
 					 strbuf_detach(&tok, NULL),
 					 strbuf_detach(&val, NULL));
 		} else if (!opts->only_trailers) {
 			strbuf_addstr(&val, trailer);
 			strbuf_strip_suffix(&val, "\n");
-			add_trailer_item(head,
+			add_trailer_item(trailer_objects,
 					 NULL,
 					 strbuf_detach(&val, NULL));
 		}
diff --git a/trailer.h b/trailer.h
index 1b7422fa2b0..647d48aa2de 100644
--- a/trailer.h
+++ b/trailer.h
@@ -70,14 +70,63 @@ void parse_trailers_from_command_line_args(struct list_head *arg_head,
 void process_trailers_lists(struct list_head *head,
 			    struct list_head *arg_head);
 
+/*
+ * Given some input string "str", return a pointer to an opaque trailer_info
+ * structure. Also populate the trailer_objects list with parsed trailer
+ * objects. Internally this calls trailer_info_get() to get the opaque pointer,
+ * but does some extra work to populate the trailer_objects linked list.
+ *
+ * The opaque trailer_info pointer can be used to check the position of the
+ * trailer block as offsets relative to the beginning of "str" in
+ * trailer_block_start() and trailer_block_end().
+ * blank_line_before_trailer_block() returns 1 if there is a blank line just
+ * before the trailer block. All of these functions are useful for preserving
+ * the input before and after the trailer block, if we were to write out the
+ * original input (but with the trailer block itself modified); see
+ * builtin/interpret-trailers.c for an example.
+ *
+ * For iterating through the parsed trailer block (if you don't care about the
+ * position of the trailer block itself in the context of the larger string text
+ * from which it was parsed), please see trailer_iterator_init() which uses the
+ * trailer_info struct internally.
+ *
+ * Lastly, callers should call trailer_info_release() when they are done using
+ * the opaque pointer.
+ *
+ * NOTE: Callers should treat both trailer_info and trailer_objects as
+ * read-only items, because there is some overlap between the two (trailer_info
+ * has "char **trailers" string array, and trailer_objects will have the same
+ * data but as a linked list of trailer_item objects). This API does not perform
+ * any synchronization between the two. In the future we should be able to
+ * reduce the duplication and use just the linked list.
+ */
 struct trailer_info *parse_trailers(const struct process_trailer_options *,
 				    const char *str,
-				    struct list_head *head);
+				    struct list_head *trailer_objects);
 
+/*
+ * Return the offset of the start of the trailer block. That is, 0 is the start
+ * of the input ("str" in parse_trailers()) and some other positive number
+ * indicates how many bytes we have to skip over before we get to the beginning
+ * of the trailer block.
+ */
 size_t trailer_block_start(struct trailer_info *);
+
+/*
+ * Return the end of the trailer block, again relative to the start of the
+ * input.
+ */
 size_t trailer_block_end(struct trailer_info *);
+
+/*
+ * Return 1 if the trailer block had an extra newline (blank line) just before
+ * it.
+ */
 int blank_line_before_trailer_block(struct trailer_info *);
 
+/*
+ * Free trailer_info struct.
+ */
 void trailer_info_release(struct trailer_info *info);
 
 void trailer_config_init(void);
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v3 10/10] trailer unit tests: inspect iterator contents
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
                       ` (8 preceding siblings ...)
  2024-04-26  0:26     ` [PATCH v3 09/10] trailer: document parse_trailers() usage Linus Arver via GitGitGadget
@ 2024-04-26  0:26     ` Linus Arver via GitGitGadget
  2024-04-27 12:51     ` [PATCH v3 00/10] Make trailer_info struct private (plus sequencer cleanup) Christian Couder
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
  11 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-04-26  0:26 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linusa@google.com>

Previously we only checked whether we would iterate a certain (expected)
number of times.

Also check the parsed "raw", "key" and "val" fields during each
iteration.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Linus Arver <linusa@google.com>
---
 t/unit-tests/t-trailer.c | 161 +++++++++++++++++++++++++++++++++++----
 1 file changed, 148 insertions(+), 13 deletions(-)

diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c
index 262e2838273..2abba913d00 100644
--- a/t/unit-tests/t-trailer.c
+++ b/t/unit-tests/t-trailer.c
@@ -1,14 +1,27 @@
 #include "test-lib.h"
 #include "trailer.h"
 
-static void t_trailer_iterator(const char *msg, size_t num_expected_objects)
+struct trailer_assertions {
+	const char *raw;
+	const char *key;
+	const char *val;
+};
+
+static void t_trailer_iterator(const char *msg, size_t num_expected_objects,
+			       struct trailer_assertions *trailer_assertions)
 {
 	struct trailer_iterator iter;
 	size_t i = 0;
 
 	trailer_iterator_init(&iter, msg);
-	while (trailer_iterator_advance(&iter))
+	while (trailer_iterator_advance(&iter)) {
+		if (num_expected_objects) {
+			check_str(iter.raw, trailer_assertions[i].raw);
+			check_str(iter.key.buf, trailer_assertions[i].key);
+			check_str(iter.val.buf, trailer_assertions[i].val);
+		}
 		i++;
+	}
 	trailer_iterator_release(&iter);
 
 	check_uint(i, ==, num_expected_objects);
@@ -16,22 +29,26 @@ static void t_trailer_iterator(const char *msg, size_t num_expected_objects)
 
 static void run_t_trailer_iterator(void)
 {
+
 	static struct test_cases {
 		const char *name;
 		const char *msg;
 		size_t num_expected_objects;
+		struct trailer_assertions trailer_assertions[10];
 	} tc[] = {
 		{
 			"empty input",
 			"",
-			0
+			0,
+			{{0}},
 		},
 		{
 			"no newline at beginning",
 			"Fixes: x\n"
 			"Acked-by: x\n"
 			"Reviewed-by: x\n",
-			0
+			0,
+			{{0}},
 		},
 		{
 			"newline at beginning",
@@ -39,7 +56,27 @@ static void run_t_trailer_iterator(void)
 			"Fixes: x\n"
 			"Acked-by: x\n"
 			"Reviewed-by: x\n",
-			3
+			3,
+			{
+				{
+					.raw = "Fixes: x\n",
+					.key = "Fixes",
+					.val = "x",
+				},
+				{
+					.raw = "Acked-by: x\n",
+					.key = "Acked-by",
+					.val = "x",
+				},
+				{
+					.raw = "Reviewed-by: x\n",
+					.key = "Reviewed-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"without body text",
@@ -48,7 +85,27 @@ static void run_t_trailer_iterator(void)
 			"Fixes: x\n"
 			"Acked-by: x\n"
 			"Reviewed-by: x\n",
-			3
+			3,
+			{
+				{
+					.raw = "Fixes: x\n",
+					.key = "Fixes",
+					.val = "x",
+				},
+				{
+					.raw = "Acked-by: x\n",
+					.key = "Acked-by",
+					.val = "x",
+				},
+				{
+					.raw = "Reviewed-by: x\n",
+					.key = "Reviewed-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"with body text, without divider",
@@ -63,7 +120,32 @@ static void run_t_trailer_iterator(void)
 			"Acked-by: x\n"
 			"Reviewed-by: x\n"
 			"Signed-off-by: x\n",
-			4
+			4,
+			{
+				{
+					.raw = "Fixes: x\n",
+					.key = "Fixes",
+					.val = "x",
+				},
+				{
+					.raw = "Acked-by: x\n",
+					.key = "Acked-by",
+					.val = "x",
+				},
+				{
+					.raw = "Reviewed-by: x\n",
+					.key = "Reviewed-by",
+					.val = "x",
+				},
+				{
+					.raw = "Signed-off-by: x\n",
+					.key = "Signed-off-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"with body text, without divider (second trailer block)",
@@ -85,7 +167,22 @@ static void run_t_trailer_iterator(void)
 			 */
 			"Helped-by: x\n"
 			"Signed-off-by: x\n",
-			2
+			2,
+			{
+				{
+					.raw = "Helped-by: x\n",
+					.key = "Helped-by",
+					.val = "x",
+				},
+				{
+					.raw = "Signed-off-by: x\n",
+					.key = "Signed-off-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"with body text, with divider",
@@ -103,7 +200,17 @@ static void run_t_trailer_iterator(void)
 			 * always ignores the divider.
 			 */
 			"Signed-off-by: x\n",
-			1
+			1,
+			{
+				{
+					.raw = "Signed-off-by: x\n",
+					.key = "Signed-off-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"with non-trailer lines in trailer block",
@@ -125,7 +232,32 @@ static void run_t_trailer_iterator(void)
 			 * because we still want to iterate through the entire
 			 * block.
 			 */
-			4
+			4,
+			{
+				{
+					.raw = "not a trailer line\n",
+					.key = "not a trailer line",
+					.val = "",
+				},
+				{
+					.raw = "not a trailer line\n",
+					.key = "not a trailer line",
+					.val = "",
+				},
+				{
+					.raw = "not a trailer line\n",
+					.key = "not a trailer line",
+					.val = "",
+				},
+				{
+					.raw = "Signed-off-by: x\n",
+					.key = "Signed-off-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"with non-trailer lines (one too many) in trailer block",
@@ -140,7 +272,8 @@ static void run_t_trailer_iterator(void)
 			"not a trailer line\n"
 			"not a trailer line\n"
 			"Signed-off-by: x\n",
-			0
+			0,
+			{{0}},
 		},
 		{
 			"with non-trailer lines (only 1) in trailer block, but no Git-generated trailers",
@@ -162,13 +295,15 @@ static void run_t_trailer_iterator(void)
 			"Acked-by: x\n"
 			"Acked-by: x\n"
 			"not a trailer line\n",
-			0
+			0,
+			{{0}},
 		},
 	};
 
 	for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
 		TEST(t_trailer_iterator(tc[i].msg,
-					tc[i].num_expected_objects),
+					tc[i].num_expected_objects,
+					tc[i].trailer_assertions),
 		     "%s", tc[i].name);
 	}
 }
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* Re: [PATCH v3 02/10] trailer: add unit tests for trailer iterator
  2024-04-26  0:26     ` [PATCH v3 02/10] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
@ 2024-04-26 14:51       ` Christian Couder
  2024-04-26 16:20         ` Junio C Hamano
  2024-04-26 16:25         ` Linus Arver
  0 siblings, 2 replies; 67+ messages in thread
From: Christian Couder @ 2024-04-26 14:51 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Junio C Hamano, Emily Shaffer,
	Josh Steadmon, Randall S. Becker, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

On Fri, Apr 26, 2024 at 2:26 AM Linus Arver via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Linus Arver <linusa@google.com>
>
> Test the number of trailers found by the iterator (to be more precise,
> the parsing mechanism which the iterator just walks over) when given
> some some arbitrary log message.

s/some some/some/

> We test the iterator because it is a public interface function exposed
> by the trailer API (we generally don't want to test internal
> implementation details which are, unlike the API, subject to drastic
> changes).
>
> Signed-off-by: Linus Arver <linusa@google.com>


> +static void run_t_trailer_iterator(void)
> +{
> +       static struct test_cases {
> +               const char *name;
> +               const char *msg;
> +               size_t num_expected_trailers;
> +       } tc[] = {

...

> +       };
> +
> +       for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
> +               TEST(t_trailer_iterator(tc[i].msg,
> +                                       tc[i].num_expected_trailers),
> +                    "%s", tc[i].name);

Nit: the members of struct test_cases are used in the (msg,
num_expected_trailers, name) order, while they are declared in the
(name, msg, num_expected_trailers) order. I think it would make it a
bit easier to use in struct test_cases the same order in which they
are used in the TEST() macro.

> +       }
> +}
> +
> +int cmd_main(int argc, const char **argv)
> +{
> +       run_t_trailer_iterator();
> +       return test_done();
> +}

LGTM otherwise.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v3 02/10] trailer: add unit tests for trailer iterator
  2024-04-26 14:51       ` Christian Couder
@ 2024-04-26 16:20         ` Junio C Hamano
  2024-04-26 16:25         ` Linus Arver
  1 sibling, 0 replies; 67+ messages in thread
From: Junio C Hamano @ 2024-04-26 16:20 UTC (permalink / raw)
  To: Christian Couder
  Cc: Linus Arver via GitGitGadget, git, Christian Couder,
	Emily Shaffer, Josh Steadmon, Randall S. Becker,
	Kristoffer Haugsbakk, Linus Arver, Linus Arver

Christian Couder <christian.couder@gmail.com> writes:

> On Fri, Apr 26, 2024 at 2:26 AM Linus Arver via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>>
>> From: Linus Arver <linusa@google.com>
>>
>> Test the number of trailers found by the iterator (to be more precise,
>> the parsing mechanism which the iterator just walks over) when given
>> some some arbitrary log message.
>
> s/some some/some/

Right.

>> +static void run_t_trailer_iterator(void)
>> +{
>> +       static struct test_cases {
>> +               const char *name;
>> +               const char *msg;
>> +               size_t num_expected_trailers;
>> +       } tc[] = {
>
> ...
>
>> +       };
>> +
>> +       for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
>> +               TEST(t_trailer_iterator(tc[i].msg,
>> +                                       tc[i].num_expected_trailers),
>> +                    "%s", tc[i].name);
>
> Nit: the members of struct test_cases are used in the (msg,
> num_expected_trailers, name) order, while they are declared in the
> (name, msg, num_expected_trailers) order. I think it would make it a
> bit easier to use in struct test_cases the same order in which they
> are used in the TEST() macro.

I am not sure if I agree.  In the array of struct, being able to
identify each array item with its .name component makes quite a lot
of sense, especially when the .name member is not really part of the
data used in tests but is used as an identifier for the tuple made
of other members (i.e., <msg, num_expected_trailers> in this case).

The TEST() macro is unable to take "name" as an early parameter than
others due to how it wants to create the identifying string (i.e.,
doing an equivalent of strfmt() on tc[i].name), but reordering the
struct members to match the peculiar order the members are used
smells like a tail wagging a dog.

>
>> +       }
>> +}
>> +
>> +int cmd_main(int argc, const char **argv)
>> +{
>> +       run_t_trailer_iterator();
>> +       return test_done();
>> +}
>
> LGTM otherwise.

Thanks.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v3 02/10] trailer: add unit tests for trailer iterator
  2024-04-26 14:51       ` Christian Couder
  2024-04-26 16:20         ` Junio C Hamano
@ 2024-04-26 16:25         ` Linus Arver
  1 sibling, 0 replies; 67+ messages in thread
From: Linus Arver @ 2024-04-26 16:25 UTC (permalink / raw)
  To: Christian Couder, Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Junio C Hamano, Emily Shaffer,
	Josh Steadmon, Randall S. Becker, Kristoffer Haugsbakk,
	Linus Arver

Hello Christian!

Christian Couder <christian.couder@gmail.com> writes:

> On Fri, Apr 26, 2024 at 2:26 AM Linus Arver via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>>
>> From: Linus Arver <linusa@google.com>
>>
>> Test the number of trailers found by the iterator (to be more precise,
>> the parsing mechanism which the iterator just walks over) when given
>> some some arbitrary log message.
>
> s/some some/some/

Fixed locally, thanks. Will send as part of a reroll pending further
review comments.

>> We test the iterator because it is a public interface function exposed
>> by the trailer API (we generally don't want to test internal
>> implementation details which are, unlike the API, subject to drastic
>> changes).
>>
>> Signed-off-by: Linus Arver <linusa@google.com>
>
>
>> +static void run_t_trailer_iterator(void)
>> +{
>> +       static struct test_cases {
>> +               const char *name;
>> +               const char *msg;
>> +               size_t num_expected_trailers;
>> +       } tc[] = {
>
> ...
>
>> +       };
>> +
>> +       for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
>> +               TEST(t_trailer_iterator(tc[i].msg,
>> +                                       tc[i].num_expected_trailers),
>> +                    "%s", tc[i].name);
>
> Nit: the members of struct test_cases are used in the (msg,
> num_expected_trailers, name) order, while they are declared in the
> (name, msg, num_expected_trailers) order. I think it would make it a
> bit easier to use in struct test_cases the same order in which they
> are used in the TEST() macro.

This bothered me as well, but ultimately I preferred to see the test
names first in the actual test cases where each one is defined like

     {
        "name of test",
        ...
     },
     {
        "name of another test",
        ...
     }
     ...

instead of the other way around. FWIW this style comes from Golang where
it is the standard practice there. I suppose in this instance we have
test cases like

     {
             "without body text",
             "subject: foo bar\n"
             "\n"
             "Fixes: x\n"
             "Acked-by: x\n"
             "Reviewed-by: x\n",
             3
     },

and the separation between "name" vs "msg" could be a bit confusing on
first glance, but I don't think that's a big deal. Plus our
test_expect_success shell functions also expect the name as the first
parameter, so it would be consistent with that style.

It's unfortunate that we cannot put __VA_ARGS__ as the "first parameter"
to the TEST() macro, like

    TEST("%s", tc[i].name,
         t_trailer_iterator(tc[i].msg,
                            tc[i].num_expected_trailers),
        );

but I suppose that's a limitation of __VA_ARGS__. I also do wonder
whether we even need the test case name to be __VA_ARGS__ at all though
(we certainly don't *need* it here as the test case names are already
unique) --- so it might be fine to have another macro that only takes
the test name and a test function. Something like

    #define TC(name, t) ...

on top of the

    #define TEST(t, ...) ...

we already have, perhaps? IDK.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v3 03/10] trailer: teach iterator about non-trailer lines
  2024-04-26  0:26     ` [PATCH v3 03/10] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
@ 2024-04-27 12:50       ` Christian Couder
  2024-04-30  4:42         ` Linus Arver
  0 siblings, 1 reply; 67+ messages in thread
From: Christian Couder @ 2024-04-27 12:50 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Junio C Hamano, Emily Shaffer,
	Josh Steadmon, Randall S. Becker, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

(Sorry I just realized that I had sent this email to Linus only.)

On Fri, Apr 26, 2024 at 2:26 AM Linus Arver via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Linus Arver <linusa@google.com>
>
> Previously the iterator did not iterate over non-trailer lines. This was
> somewhat unfortunate, because trailer blocks could have non-trailer
> lines in them since 146245063e (trailer: allow non-trailers in trailer
> block, 2016-10-21), which was before the iterator was created in
> f0939a0eb1 (trailer: add interface for iterating over commit trailers,
> 2020-09-27).
>
> So if trailer API users wanted to iterate over all lines in a trailer
> block (including non-trailer lines), they could not use the iterator and
> were forced to use the lower-level trailer_info struct directly (which
> provides a raw string array that includes all lines in the trailer
> block).
>
> Change the iterator's behavior so that we also iterate over non-trailer
> lines, instead of skipping over them. The new "raw" member of the
> iterator allows API users to access previously inaccessible non-trailer
> lines. Reword the variable "trailer" to just "line" because this
> variable can now hold both trailer lines _and_ non-trailer lines.
>
> The new "raw" member is important because anyone currently not using the
> iterator is using trailer_info's raw string array directly to access
> lines to check what the combined key + value looks like. If we didn't
> provide a "raw" member here, iterator users would have to re-construct
> the unparsed line by concatenating the key and value back together again
> --- which places an undue burden for iterator users.
>
> The next commit demonstrates the use of the iterator in sequencer.c as an
> example of where "raw" will be useful, so that it can start using the
> iterator.
>
> For the existing use of the iterator in builtin/shortlog.c, we don't
> have to change the code there because that code does
>
>     trailer_iterator_init(&iter, body);
>     while (trailer_iterator_advance(&iter)) {
>         const char *value = iter.val.buf;
>
>         if (!string_list_has_string(&log->trailers, iter.key.buf))
>             continue;
>
>         ...
>
> and the
>
>         if (!string_list_has_string(&log->trailers, iter.key.buf))
>
> condition already skips over non-trailer lines (iter.key.buf is empty
> for non-trailer lines, making the comparison still work even with this
> commit).
>
> Rename "num_expected_trailers" to "num_expected_objects" in
> t/unit-tests/t-trailer.c because the items we iterate over now include
> non-trailer lines.

I think it would be simpler if the previous patch used just
"num_expected" or "expected". It's not like the other fields in the
struct ("msg" and "name") are very explicit, so why this one only?

> Signed-off-by: Linus Arver <linusa@google.com>


> diff --git a/trailer.c b/trailer.c
> index 3e4dab9c065..4700c441442 100644
> --- a/trailer.c
> +++ b/trailer.c
> @@ -1146,17 +1146,15 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
>
>  int trailer_iterator_advance(struct trailer_iterator *iter)
>  {
> -       while (iter->internal.cur < iter->internal.info.trailer_nr) {
> -               char *trailer = iter->internal.info.trailers[iter->internal.cur++];
> -               int separator_pos = find_separator(trailer, separators);
> -
> -               if (separator_pos < 1)
> -                       continue; /* not a real trailer */
> +       if (iter->internal.cur < iter->internal.info.trailer_nr) {
> +               char *line = iter->internal.info.trailers[iter->internal.cur++];
> +               int separator_pos = find_separator(line, separators);
>
> +               iter->raw = line;
>                 strbuf_reset(&iter->key);
>                 strbuf_reset(&iter->val);
>                 parse_trailer(&iter->key, &iter->val, NULL,
> -                             trailer, separator_pos);
> +                             line, separator_pos);
>                 /* Always unfold values during iteration. */
>                 unfold_value(&iter->val);
>                 return 1;
> diff --git a/trailer.h b/trailer.h
> index 9f42aa75994..ebafa3657e4 100644
> --- a/trailer.h
> +++ b/trailer.h
> @@ -125,6 +125,14 @@ void format_trailers_from_commit(const struct process_trailer_options *,
>   *   trailer_iterator_release(&iter);
>   */
>  struct trailer_iterator {
> +       /*
> +        * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
> +        * key/val pair as part of a trailer block. A trailer block can be
> +        * either 100% trailer lines, or mixed in with non-trailer lines (in
> +        * which case at least 25% must be trailer lines).

I don't think 25% is important here. What is more important is to just
say that this field could not be an actual trailer, and to tell what
the 'key' and 'val' fields below will contain then.


> +        */
> +       const char *raw;
> +
>         struct strbuf key;
>         struct strbuf val;

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v3 00/10] Make trailer_info struct private (plus sequencer cleanup)
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
                       ` (9 preceding siblings ...)
  2024-04-26  0:26     ` [PATCH v3 10/10] trailer unit tests: inspect iterator contents Linus Arver via GitGitGadget
@ 2024-04-27 12:51     ` Christian Couder
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
  11 siblings, 0 replies; 67+ messages in thread
From: Christian Couder @ 2024-04-27 12:51 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Junio C Hamano, Emily Shaffer,
	Josh Steadmon, Randall S. Becker, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

(I had sent this email only to Linus too, sorry.)

On Fri, Apr 26, 2024 at 2:26 AM Linus Arver via GitGitGadget
<gitgitgadget@gmail.com> wrote:

> Linus Arver (10):
>   Makefile: sort UNIT_TEST_PROGRAMS
>   trailer: add unit tests for trailer iterator
>   trailer: teach iterator about non-trailer lines
>   sequencer: use the trailer iterator
>   interpret-trailers: access trailer_info with new helpers
>   trailer: make parse_trailers() return trailer_info pointer
>   trailer: make trailer_info struct private
>   trailer: retire trailer_info_get() from API
>   trailer: document parse_trailers() usage
>   trailer unit tests: inspect iterator contents

I took a look at the whole series and had only a few minor comments on
some patches.

Thanks!

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v3 03/10] trailer: teach iterator about non-trailer lines
  2024-04-27 12:50       ` Christian Couder
@ 2024-04-30  4:42         ` Linus Arver
  2024-04-30  4:55           ` Linus Arver
  0 siblings, 1 reply; 67+ messages in thread
From: Linus Arver @ 2024-04-30  4:42 UTC (permalink / raw)
  To: Christian Couder, Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Junio C Hamano, Emily Shaffer,
	Josh Steadmon, Randall S. Becker, Kristoffer Haugsbakk,
	Linus Arver

Christian Couder <christian.couder@gmail.com> writes:

> (Sorry I just realized that I had sent this email to Linus only.)
>
> On Fri, Apr 26, 2024 at 2:26 AM Linus Arver via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>>
>> From: Linus Arver <linusa@google.com>
>>
>> Previously the iterator did not iterate over non-trailer lines. This was
>> somewhat unfortunate, because trailer blocks could have non-trailer
>> lines in them since 146245063e (trailer: allow non-trailers in trailer
>> block, 2016-10-21), which was before the iterator was created in
>> f0939a0eb1 (trailer: add interface for iterating over commit trailers,
>> 2020-09-27).
>>
>> So if trailer API users wanted to iterate over all lines in a trailer
>> block (including non-trailer lines), they could not use the iterator and
>> were forced to use the lower-level trailer_info struct directly (which
>> provides a raw string array that includes all lines in the trailer
>> block).
>>
>> Change the iterator's behavior so that we also iterate over non-trailer
>> lines, instead of skipping over them. The new "raw" member of the
>> iterator allows API users to access previously inaccessible non-trailer
>> lines. Reword the variable "trailer" to just "line" because this
>> variable can now hold both trailer lines _and_ non-trailer lines.
>>
>> The new "raw" member is important because anyone currently not using the
>> iterator is using trailer_info's raw string array directly to access
>> lines to check what the combined key + value looks like. If we didn't
>> provide a "raw" member here, iterator users would have to re-construct
>> the unparsed line by concatenating the key and value back together again
>> --- which places an undue burden for iterator users.
>>
>> The next commit demonstrates the use of the iterator in sequencer.c as an
>> example of where "raw" will be useful, so that it can start using the
>> iterator.
>>
>> For the existing use of the iterator in builtin/shortlog.c, we don't
>> have to change the code there because that code does
>>
>>     trailer_iterator_init(&iter, body);
>>     while (trailer_iterator_advance(&iter)) {
>>         const char *value = iter.val.buf;
>>
>>         if (!string_list_has_string(&log->trailers, iter.key.buf))
>>             continue;
>>
>>         ...
>>
>> and the
>>
>>         if (!string_list_has_string(&log->trailers, iter.key.buf))
>>
>> condition already skips over non-trailer lines (iter.key.buf is empty
>> for non-trailer lines, making the comparison still work even with this
>> commit).
>>
>> Rename "num_expected_trailers" to "num_expected_objects" in
>> t/unit-tests/t-trailer.c because the items we iterate over now include
>> non-trailer lines.
>
> I think it would be simpler if the previous patch used just
> "num_expected" or "expected". It's not like the other fields in the
> struct ("msg" and "name") are very explicit, so why this one only?

I didn't give it much thought TBH. "num_expected" SGTM. Will update.

>> Signed-off-by: Linus Arver <linusa@google.com>
>
>
>> diff --git a/trailer.c b/trailer.c
>> index 3e4dab9c065..4700c441442 100644
>> --- a/trailer.c
>> +++ b/trailer.c
>> @@ -1146,17 +1146,15 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
>>
>>  int trailer_iterator_advance(struct trailer_iterator *iter)
>>  {
>> -       while (iter->internal.cur < iter->internal.info.trailer_nr) {
>> -               char *trailer = iter->internal.info.trailers[iter->internal.cur++];
>> -               int separator_pos = find_separator(trailer, separators);
>> -
>> -               if (separator_pos < 1)
>> -                       continue; /* not a real trailer */
>> +       if (iter->internal.cur < iter->internal.info.trailer_nr) {
>> +               char *line = iter->internal.info.trailers[iter->internal.cur++];
>> +               int separator_pos = find_separator(line, separators);
>>
>> +               iter->raw = line;
>>                 strbuf_reset(&iter->key);
>>                 strbuf_reset(&iter->val);
>>                 parse_trailer(&iter->key, &iter->val, NULL,
>> -                             trailer, separator_pos);
>> +                             line, separator_pos);
>>                 /* Always unfold values during iteration. */
>>                 unfold_value(&iter->val);
>>                 return 1;
>> diff --git a/trailer.h b/trailer.h
>> index 9f42aa75994..ebafa3657e4 100644
>> --- a/trailer.h
>> +++ b/trailer.h
>> @@ -125,6 +125,14 @@ void format_trailers_from_commit(const struct process_trailer_options *,
>>   *   trailer_iterator_release(&iter);
>>   */
>>  struct trailer_iterator {
>> +       /*
>> +        * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
>> +        * key/val pair as part of a trailer block. A trailer block can be
>> +        * either 100% trailer lines, or mixed in with non-trailer lines (in
>> +        * which case at least 25% must be trailer lines).
>
> I don't think 25% is important here.

SG, will remove 25% language (FWIW we already have such language in
trailer.c if devs want to take a more closer look, so it's not like
we're losing any info overall).

> What is more important is to just
> say that this field could not be an actual trailer, and to tell what
> the 'key' and 'val' fields below will contain then.

Will update.

>
>> +        */
>> +       const char *raw;
>> +
>>         struct strbuf key;
>>         struct strbuf val;


BTW I will be on vacation for the next several weeks. However as the
suggested changes are minor, I think I can still get to them and push up
a v4 sometime this week. Cheers.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v3 03/10] trailer: teach iterator about non-trailer lines
  2024-04-30  4:42         ` Linus Arver
@ 2024-04-30  4:55           ` Linus Arver
  0 siblings, 0 replies; 67+ messages in thread
From: Linus Arver @ 2024-04-30  4:55 UTC (permalink / raw)
  To: Christian Couder, Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Junio C Hamano, Emily Shaffer,
	Josh Steadmon, Randall S. Becker, Kristoffer Haugsbakk

Linus Arver <linus@ucla.edu> writes:

> Christian Couder <christian.couder@gmail.com> writes:
>
>> (Sorry I just realized that I had sent this email to Linus only.)
>>
>> On Fri, Apr 26, 2024 at 2:26 AM Linus Arver via GitGitGadget
>> <gitgitgadget@gmail.com> wrote:
>>>
>>> From: Linus Arver <linusa@google.com>
>>>
>>> Previously the iterator did not iterate over non-trailer lines. This was
>>> somewhat unfortunate, because trailer blocks could have non-trailer
>>> lines in them since 146245063e (trailer: allow non-trailers in trailer
>>> block, 2016-10-21), which was before the iterator was created in
>>> f0939a0eb1 (trailer: add interface for iterating over commit trailers,
>>> 2020-09-27).
>>>
>>> So if trailer API users wanted to iterate over all lines in a trailer
>>> block (including non-trailer lines), they could not use the iterator and
>>> were forced to use the lower-level trailer_info struct directly (which
>>> provides a raw string array that includes all lines in the trailer
>>> block).
>>>
>>> Change the iterator's behavior so that we also iterate over non-trailer
>>> lines, instead of skipping over them. The new "raw" member of the
>>> iterator allows API users to access previously inaccessible non-trailer
>>> lines. Reword the variable "trailer" to just "line" because this
>>> variable can now hold both trailer lines _and_ non-trailer lines.
>>>
>>> The new "raw" member is important because anyone currently not using the
>>> iterator is using trailer_info's raw string array directly to access
>>> lines to check what the combined key + value looks like. If we didn't
>>> provide a "raw" member here, iterator users would have to re-construct
>>> the unparsed line by concatenating the key and value back together again
>>> --- which places an undue burden for iterator users.
>>>
>>> The next commit demonstrates the use of the iterator in sequencer.c as an
>>> example of where "raw" will be useful, so that it can start using the
>>> iterator.
>>>
>>> For the existing use of the iterator in builtin/shortlog.c, we don't
>>> have to change the code there because that code does
>>>
>>>     trailer_iterator_init(&iter, body);
>>>     while (trailer_iterator_advance(&iter)) {
>>>         const char *value = iter.val.buf;
>>>
>>>         if (!string_list_has_string(&log->trailers, iter.key.buf))
>>>             continue;
>>>
>>>         ...
>>>
>>> and the
>>>
>>>         if (!string_list_has_string(&log->trailers, iter.key.buf))
>>>
>>> condition already skips over non-trailer lines (iter.key.buf is empty
>>> for non-trailer lines, making the comparison still work even with this
>>> commit).
>>>
>>> Rename "num_expected_trailers" to "num_expected_objects" in
>>> t/unit-tests/t-trailer.c because the items we iterate over now include
>>> non-trailer lines.
>>
>> I think it would be simpler if the previous patch used just
>> "num_expected" or "expected". It's not like the other fields in the
>> struct ("msg" and "name") are very explicit, so why this one only?
>
> I didn't give it much thought TBH. "num_expected" SGTM. Will update.

Another thing: I will rename "trailer_assertions" in Path 10 to probably
"trailer_contents" because it sounds simpler. I am replying here instead
of on Patch 10 because my mail setup still has some rough edges for the
transition away from @google.com (I no longer work there).

And on that note, I'll have to update the SOB lines to match my new
email address.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* [PATCH v4 00/10] Make trailer_info struct private (plus sequencer cleanup)
  2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
                       ` (10 preceding siblings ...)
  2024-04-27 12:51     ` [PATCH v3 00/10] Make trailer_info struct private (plus sequencer cleanup) Christian Couder
@ 2024-05-02  4:54     ` Linus Arver via GitGitGadget
  2024-05-02  4:54       ` [PATCH v4 01/10] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
                         ` (10 more replies)
  11 siblings, 11 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-05-02  4:54 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver

NOTE: This series is based on the la/format-trailer-info topic branch (see
its discussion at [1]).

This series is based on the initial series [2], notably the v4 version of
patches 17-20 as suggested by Christian [3]. This version addresses the
review comments for those patches, namely the splitting up of Patch 19 there
into 3 separate patches [4] (as Patches 05-07 here) .

The central idea is to make the trailer_info struct private (that is, move
its definition from trailer.h to trailer.c) --- aka the "pimpl" idiom. See
the detailed commit message for Patch 07 for the motivation behind the
change.

Patch 04 makes sequencer.c a well-behaved trailer API consumer, by making
use of the trailer iterator. Patch 03 prepares us for Patch 04. Patch 08
slightly reduces the weight of the API by removing (from the API surface) an
unused function.


Notable changes in v4
=====================

 * Drop "25%" language in Patch 03
 * Rename some variables
 * Update patch emails to personal (linus@ucla.edu) email


Notable changes in v3
=====================

 * (NEW Patch 10) Expand test coverage to check the contents of each
   iteration (raw, key, val fields), not just the total number of iterations
 * (NEW Patch 09) Add documentation in <trailer.h> for using
   parse_trailers()
 * (unrelated) I will lose access to my linusa@google.com email address
   tomorrow (I'm switching jobs!) and so future emails from me will come
   from linus@ucla.edu [5]. I've added the latter email to the CC list here
   so things should just work. Cheers


Notable changes in v2
=====================

 * Add unit tests at the beginning of the series (Patches 01 and 02) and use
   it to verify that the other edge cases remain unchanged when we add the
   "raw" member (Patch 03)

[1]
https://lore.kernel.org/git/pull.1694.git.1710485706.gitgitgadget@gmail.com/
[2]
https://lore.kernel.org/git/pull.1632.v4.git.1707196348.gitgitgadget@gmail.com/
[3]
https://lore.kernel.org/git/CAP8UFD08F0V13X0+CJ1uhMPzPWVMs2okGVMJch0DkQg5M3BWLA@mail.gmail.com/
[4]
https://lore.kernel.org/git/CAP8UFD1twELGKvvesxgCrZrypKZpgSt04ira3mvurG1UbpDfxQ@mail.gmail.com/
[5]
https://lore.kernel.org/git/pull.1720.git.1713309711217.gitgitgadget@gmail.com/

Linus Arver (10):
  Makefile: sort UNIT_TEST_PROGRAMS
  trailer: add unit tests for trailer iterator
  trailer: teach iterator about non-trailer lines
  sequencer: use the trailer iterator
  interpret-trailers: access trailer_info with new helpers
  trailer: make parse_trailers() return trailer_info pointer
  trailer: make trailer_info struct private
  trailer: retire trailer_info_get() from API
  trailer: document parse_trailers() usage
  trailer unit tests: inspect iterator contents

 Makefile                     |   5 +-
 builtin/interpret-trailers.c |  12 +-
 sequencer.c                  |  27 ++-
 t/unit-tests/t-trailer.c     | 315 +++++++++++++++++++++++++++++++++++
 trailer.c                    | 167 ++++++++++++-------
 trailer.h                    |  94 +++++++----
 6 files changed, 506 insertions(+), 114 deletions(-)
 create mode 100644 t/unit-tests/t-trailer.c


base-commit: 3452d173241c8b87ecdd67f91f594cb14327e394
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1696%2Flistx%2Ftrailer-api-part-3-v4
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1696/listx/trailer-api-part-3-v4
Pull-Request: https://github.com/gitgitgadget/git/pull/1696

Range-diff vs v3:

  1:  b6a1304f8ae !  1:  8a9f71442d8 Makefile: sort UNIT_TEST_PROGRAMS
     @@
       ## Metadata ##
     -Author: Linus Arver <linusa@google.com>
     +Author: Linus Arver <linus@ucla.edu>
      
       ## Commit message ##
          Makefile: sort UNIT_TEST_PROGRAMS
      
     -    Signed-off-by: Linus Arver <linusa@google.com>
     +    Signed-off-by: Linus Arver <linus@ucla.edu>
      
       ## Makefile ##
      @@ Makefile: THIRD_PARTY_SOURCES += sha1collisiondetection/%
  2:  4ad0fbbb33c !  2:  b503b539c6f trailer: add unit tests for trailer iterator
     @@
       ## Metadata ##
     -Author: Linus Arver <linusa@google.com>
     +Author: Linus Arver <linus@ucla.edu>
      
       ## Commit message ##
          trailer: add unit tests for trailer iterator
     @@ Commit message
          implementation details which are, unlike the API, subject to drastic
          changes).
      
     -    Signed-off-by: Linus Arver <linusa@google.com>
     +    Signed-off-by: Linus Arver <linus@ucla.edu>
      
       ## Makefile ##
      @@ Makefile: UNIT_TEST_PROGRAMS += t-ctype
  3:  9077d5a315d !  3:  4aeb48050b1 trailer: teach iterator about non-trailer lines
     @@
       ## Metadata ##
     -Author: Linus Arver <linusa@google.com>
     +Author: Linus Arver <linus@ucla.edu>
      
       ## Commit message ##
          trailer: teach iterator about non-trailer lines
     @@ Commit message
          for non-trailer lines, making the comparison still work even with this
          commit).
      
     -    Rename "num_expected_trailers" to "num_expected_objects" in
     +    Rename "num_expected_trailers" to "num_expected" in
          t/unit-tests/t-trailer.c because the items we iterate over now include
          non-trailer lines.
      
     -    Signed-off-by: Linus Arver <linusa@google.com>
     +    Signed-off-by: Linus Arver <linus@ucla.edu>
      
       ## t/unit-tests/t-trailer.c ##
      @@
     @@ t/unit-tests/t-trailer.c
       #include "trailer.h"
       
      -static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
     -+static void t_trailer_iterator(const char *msg, size_t num_expected_objects)
     ++static void t_trailer_iterator(const char *msg, size_t num_expected)
       {
       	struct trailer_iterator iter;
       	size_t i = 0;
     @@ t/unit-tests/t-trailer.c: static void t_trailer_iterator(const char *msg, size_t
       	trailer_iterator_release(&iter);
       
      -	check_uint(i, ==, num_expected_trailers);
     -+	check_uint(i, ==, num_expected_objects);
     ++	check_uint(i, ==, num_expected);
       }
       
       static void run_t_trailer_iterator(void)
     @@ t/unit-tests/t-trailer.c: static void run_t_trailer_iterator(void)
       		const char *name;
       		const char *msg;
      -		size_t num_expected_trailers;
     -+		size_t num_expected_objects;
     ++		size_t num_expected;
       	} tc[] = {
       		{
       			"empty input",
     @@ t/unit-tests/t-trailer.c: static void run_t_trailer_iterator(void)
       	for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
       		TEST(t_trailer_iterator(tc[i].msg,
      -					tc[i].num_expected_trailers),
     -+					tc[i].num_expected_objects),
     ++					tc[i].num_expected),
       		     "%s", tc[i].name);
       	}
       }
     @@ trailer.h: void format_trailers_from_commit(const struct process_trailer_options
       struct trailer_iterator {
      +	/*
      +	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
     -+	 * key/val pair as part of a trailer block. A trailer block can be
     -+	 * either 100% trailer lines, or mixed in with non-trailer lines (in
     -+	 * which case at least 25% must be trailer lines).
     ++	 * key/val pair as part of a trailer block (as the "key" and "val"
     ++	 * fields below). If a line fails to parse as a trailer, then the "key"
     ++	 * will be the entire line and "val" will be the empty string.
      +	 */
      +	const char *raw;
     -+
       	struct strbuf key;
       	struct strbuf val;
       
  4:  4a1d18da574 !  4:  a3d080d4d6c sequencer: use the trailer iterator
     @@
       ## Metadata ##
     -Author: Linus Arver <linusa@google.com>
     +Author: Linus Arver <linus@ucla.edu>
      
       ## Commit message ##
          sequencer: use the trailer iterator
     @@ Commit message
          before when we iterated over the unparsed string array (char **trailers)
          in trailer_info.
      
     -    Signed-off-by: Linus Arver <linusa@google.com>
     +    Signed-off-by: Linus Arver <linus@ucla.edu>
      
       ## sequencer.c ##
      @@ sequencer.c: static const char *get_todo_path(const struct replay_opts *opts)
  5:  460979ba964 !  5:  44df42ca503 interpret-trailers: access trailer_info with new helpers
     @@
       ## Metadata ##
     -Author: Linus Arver <linusa@google.com>
     +Author: Linus Arver <linus@ucla.edu>
      
       ## Commit message ##
          interpret-trailers: access trailer_info with new helpers
     @@ Commit message
          implementation (and thus hidden from the API).
      
          Helped-by: Christian Couder <chriscool@tuxfamily.org>
     -    Signed-off-by: Linus Arver <linusa@google.com>
     +    Signed-off-by: Linus Arver <linus@ucla.edu>
      
       ## builtin/interpret-trailers.c ##
      @@ builtin/interpret-trailers.c: static void interpret_trailers(const struct process_trailer_options *opts,
  6:  d217858c637 !  6:  9ed7cef9d29 trailer: make parse_trailers() return trailer_info pointer
     @@
       ## Metadata ##
     -Author: Linus Arver <linusa@google.com>
     +Author: Linus Arver <linus@ucla.edu>
      
       ## Commit message ##
          trailer: make parse_trailers() return trailer_info pointer
     @@ Commit message
          format_trailers_from_commit() and trailer_iterator_init() accordingly.
      
          Helped-by: Christian Couder <chriscool@tuxfamily.org>
     -    Signed-off-by: Linus Arver <linusa@google.com>
     +    Signed-off-by: Linus Arver <linus@ucla.edu>
      
       ## builtin/interpret-trailers.c ##
      @@ builtin/interpret-trailers.c: static void interpret_trailers(const struct process_trailer_options *opts,
  7:  49c66c48cc1 !  7:  246ac9a5d07 trailer: make trailer_info struct private
     @@
       ## Metadata ##
     -Author: Linus Arver <linusa@google.com>
     +Author: Linus Arver <linus@ucla.edu>
      
       ## Commit message ##
          trailer: make trailer_info struct private
     @@ Commit message
      
          Helped-by: Junio C Hamano <gitster@pobox.com>
          Helped-by: Christian Couder <chriscool@tuxfamily.org>
     -    Signed-off-by: Linus Arver <linusa@google.com>
     +    Signed-off-by: Linus Arver <linus@ucla.edu>
      
       ## trailer.c ##
      @@
  8:  56e1cca4b7b !  8:  ca6f0c4208c trailer: retire trailer_info_get() from API
     @@
       ## Metadata ##
     -Author: Linus Arver <linusa@google.com>
     +Author: Linus Arver <linus@ucla.edu>
      
       ## Commit message ##
          trailer: retire trailer_info_get() from API
     @@ Commit message
          We have to also reposition it to be above parse_trailers(), which
          depends on it.
      
     -    Signed-off-by: Linus Arver <linusa@google.com>
     +    Signed-off-by: Linus Arver <linus@ucla.edu>
      
       ## trailer.c ##
      @@ trailer.c: static struct trailer_info *trailer_info_new(void)
  9:  35304837e08 !  9:  c1a0f1bed04 trailer: document parse_trailers() usage
     @@
       ## Metadata ##
     -Author: Linus Arver <linusa@google.com>
     +Author: Linus Arver <linus@ucla.edu>
      
       ## Commit message ##
          trailer: document parse_trailers() usage
     @@ Commit message
          comments a bit easier to read (because "head" itself doesn't really have
          any domain-specific meaning here).
      
     -    Signed-off-by: Linus Arver <linusa@google.com>
     +    Signed-off-by: Linus Arver <linus@ucla.edu>
      
       ## trailer.c ##
      @@ trailer.c: static struct trailer_info *trailer_info_get(const struct process_trailer_option
 10:  4d53707f836 ! 10:  310b632ddfd trailer unit tests: inspect iterator contents
     @@
       ## Metadata ##
     -Author: Linus Arver <linusa@google.com>
     +Author: Linus Arver <linus@ucla.edu>
      
       ## Commit message ##
          trailer unit tests: inspect iterator contents
     @@ Commit message
          iteration.
      
          Helped-by: Junio C Hamano <gitster@pobox.com>
     -    Signed-off-by: Linus Arver <linusa@google.com>
     +    Signed-off-by: Linus Arver <linus@ucla.edu>
      
       ## t/unit-tests/t-trailer.c ##
      @@
       #include "test-lib.h"
       #include "trailer.h"
       
     --static void t_trailer_iterator(const char *msg, size_t num_expected_objects)
     -+struct trailer_assertions {
     +-static void t_trailer_iterator(const char *msg, size_t num_expected)
     ++struct contents {
      +	const char *raw;
      +	const char *key;
      +	const char *val;
      +};
      +
     -+static void t_trailer_iterator(const char *msg, size_t num_expected_objects,
     -+			       struct trailer_assertions *trailer_assertions)
     ++static void t_trailer_iterator(const char *msg, size_t num_expected,
     ++			       struct contents *contents)
       {
       	struct trailer_iterator iter;
       	size_t i = 0;
     @@ t/unit-tests/t-trailer.c
       	trailer_iterator_init(&iter, msg);
      -	while (trailer_iterator_advance(&iter))
      +	while (trailer_iterator_advance(&iter)) {
     -+		if (num_expected_objects) {
     -+			check_str(iter.raw, trailer_assertions[i].raw);
     -+			check_str(iter.key.buf, trailer_assertions[i].key);
     -+			check_str(iter.val.buf, trailer_assertions[i].val);
     ++		if (num_expected) {
     ++			check_str(iter.raw, contents[i].raw);
     ++			check_str(iter.key.buf, contents[i].key);
     ++			check_str(iter.val.buf, contents[i].val);
      +		}
       		i++;
      +	}
       	trailer_iterator_release(&iter);
       
     - 	check_uint(i, ==, num_expected_objects);
     -@@ t/unit-tests/t-trailer.c: static void t_trailer_iterator(const char *msg, size_t num_expected_objects)
     + 	check_uint(i, ==, num_expected);
     +@@ t/unit-tests/t-trailer.c: static void t_trailer_iterator(const char *msg, size_t num_expected)
       
       static void run_t_trailer_iterator(void)
       {
     @@ t/unit-tests/t-trailer.c: static void t_trailer_iterator(const char *msg, size_t
       	static struct test_cases {
       		const char *name;
       		const char *msg;
     - 		size_t num_expected_objects;
     -+		struct trailer_assertions trailer_assertions[10];
     + 		size_t num_expected;
     ++		struct contents contents[10];
       	} tc[] = {
       		{
       			"empty input",
     @@ t/unit-tests/t-trailer.c: static void run_t_trailer_iterator(void)
       
       	for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
       		TEST(t_trailer_iterator(tc[i].msg,
     --					tc[i].num_expected_objects),
     -+					tc[i].num_expected_objects,
     -+					tc[i].trailer_assertions),
     +-					tc[i].num_expected),
     ++					tc[i].num_expected,
     ++					tc[i].contents),
       		     "%s", tc[i].name);
       	}
       }

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 67+ messages in thread

* [PATCH v4 01/10] Makefile: sort UNIT_TEST_PROGRAMS
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
@ 2024-05-02  4:54       ` Linus Arver via GitGitGadget
  2024-05-02  4:54       ` [PATCH v4 02/10] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
                         ` (9 subsequent siblings)
  10 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-05-02  4:54 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linus@ucla.edu>

Signed-off-by: Linus Arver <linus@ucla.edu>
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4e255c81f22..d3a3f16f076 100644
--- a/Makefile
+++ b/Makefile
@@ -1343,10 +1343,10 @@ THIRD_PARTY_SOURCES += sha1collisiondetection/%
 THIRD_PARTY_SOURCES += sha1dc/%
 
 UNIT_TEST_PROGRAMS += t-basic
-UNIT_TEST_PROGRAMS += t-mem-pool
-UNIT_TEST_PROGRAMS += t-strbuf
 UNIT_TEST_PROGRAMS += t-ctype
+UNIT_TEST_PROGRAMS += t-mem-pool
 UNIT_TEST_PROGRAMS += t-prio-queue
+UNIT_TEST_PROGRAMS += t-strbuf
 UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v4 02/10] trailer: add unit tests for trailer iterator
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
  2024-05-02  4:54       ` [PATCH v4 01/10] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
@ 2024-05-02  4:54       ` Linus Arver via GitGitGadget
  2024-05-02 16:54         ` Junio C Hamano
  2024-05-02  4:54       ` [PATCH v4 03/10] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
                         ` (8 subsequent siblings)
  10 siblings, 1 reply; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-05-02  4:54 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linus@ucla.edu>

Test the number of trailers found by the iterator (to be more precise,
the parsing mechanism which the iterator just walks over) when given
some some arbitrary log message.

We test the iterator because it is a public interface function exposed
by the trailer API (we generally don't want to test internal
implementation details which are, unlike the API, subject to drastic
changes).

Signed-off-by: Linus Arver <linus@ucla.edu>
---
 Makefile                 |   1 +
 t/unit-tests/t-trailer.c | 174 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 175 insertions(+)
 create mode 100644 t/unit-tests/t-trailer.c

diff --git a/Makefile b/Makefile
index d3a3f16f076..5418ddd03be 100644
--- a/Makefile
+++ b/Makefile
@@ -1347,6 +1347,7 @@ UNIT_TEST_PROGRAMS += t-ctype
 UNIT_TEST_PROGRAMS += t-mem-pool
 UNIT_TEST_PROGRAMS += t-prio-queue
 UNIT_TEST_PROGRAMS += t-strbuf
+UNIT_TEST_PROGRAMS += t-trailer
 UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o
diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c
new file mode 100644
index 00000000000..c1f897235c7
--- /dev/null
+++ b/t/unit-tests/t-trailer.c
@@ -0,0 +1,174 @@
+#include "test-lib.h"
+#include "trailer.h"
+
+static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
+{
+	struct trailer_iterator iter;
+	size_t i = 0;
+
+	trailer_iterator_init(&iter, msg);
+	while (trailer_iterator_advance(&iter))
+		i++;
+	trailer_iterator_release(&iter);
+
+	check_uint(i, ==, num_expected_trailers);
+}
+
+static void run_t_trailer_iterator(void)
+{
+	static struct test_cases {
+		const char *name;
+		const char *msg;
+		size_t num_expected_trailers;
+	} tc[] = {
+		{
+			"empty input",
+			"",
+			0
+		},
+		{
+			"no newline at beginning",
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n",
+			0
+		},
+		{
+			"newline at beginning",
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n",
+			3
+		},
+		{
+			"without body text",
+			"subject: foo bar\n"
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n",
+			3
+		},
+		{
+			"with body text, without divider",
+			"my subject\n"
+			"\n"
+			"my body which is long\n"
+			"and contains some special\n"
+			"chars like : = ? !\n"
+			"hello\n"
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n"
+			"Signed-off-by: x\n",
+			4
+		},
+		{
+			"with body text, without divider (second trailer block)",
+			"my subject\n"
+			"\n"
+			"my body which is long\n"
+			"and contains some special\n"
+			"chars like : = ? !\n"
+			"hello\n"
+			"\n"
+			"Fixes: x\n"
+			"Acked-by: x\n"
+			"Reviewed-by: x\n"
+			"Signed-off-by: x\n"
+			"\n"
+			/*
+			 * Because this is the last trailer block, it takes
+			 * precedence over the first one encountered above.
+			 */
+			"Helped-by: x\n"
+			"Signed-off-by: x\n",
+			2
+		},
+		{
+			"with body text, with divider",
+			"my subject\n"
+			"\n"
+			"my body which is long\n"
+			"and contains some special\n"
+			"chars like : = ? !\n"
+			"hello\n"
+			"\n"
+			"---\n"
+			"\n"
+			/*
+			 * This trailer still counts because the iterator
+			 * always ignores the divider.
+			 */
+			"Signed-off-by: x\n",
+			1
+		},
+		{
+			"with non-trailer lines in trailer block",
+			"subject: foo bar\n"
+			"\n"
+			/*
+			 * Even though this trailer block has a non-trailer line
+			 * in it, it's still a valid trailer block because it's
+			 * at least 25% trailers and is Git-generated (see
+			 * git_generated_prefixes[] in trailer.c).
+			 */
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"Signed-off-by: x\n",
+			1
+		},
+		{
+			"with non-trailer lines (one too many) in trailer block",
+			"subject: foo bar\n"
+			"\n"
+			/*
+			 * This block has only 20% trailers, so it's below the
+			 * 25% threshold.
+			 */
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"not a trailer line\n"
+			"Signed-off-by: x\n",
+			0
+		},
+		{
+			"with non-trailer lines (only 1) in trailer block, but no Git-generated trailers",
+			"subject: foo bar\n"
+			"\n"
+			/*
+			 * This block has only 1 non-trailer out of 10 (IOW, 90%
+			 * trailers) but is not considered a trailer block
+			 * because the 25% threshold only applies to cases where
+			 * there was a Git-generated trailer.
+			 */
+			"Reviewed-by: x\n"
+			"Reviewed-by: x\n"
+			"Reviewed-by: x\n"
+			"Helped-by: x\n"
+			"Helped-by: x\n"
+			"Helped-by: x\n"
+			"Acked-by: x\n"
+			"Acked-by: x\n"
+			"Acked-by: x\n"
+			"not a trailer line\n",
+			0
+		},
+	};
+
+	for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
+		TEST(t_trailer_iterator(tc[i].msg,
+					tc[i].num_expected_trailers),
+		     "%s", tc[i].name);
+	}
+}
+
+int cmd_main(int argc, const char **argv)
+{
+	run_t_trailer_iterator();
+	return test_done();
+}
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v4 03/10] trailer: teach iterator about non-trailer lines
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
  2024-05-02  4:54       ` [PATCH v4 01/10] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
  2024-05-02  4:54       ` [PATCH v4 02/10] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
@ 2024-05-02  4:54       ` Linus Arver via GitGitGadget
  2024-05-04 15:33         ` Phillip Wood
  2024-05-02  4:54       ` [PATCH v4 04/10] sequencer: use the trailer iterator Linus Arver via GitGitGadget
                         ` (7 subsequent siblings)
  10 siblings, 1 reply; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-05-02  4:54 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linus@ucla.edu>

Previously the iterator did not iterate over non-trailer lines. This was
somewhat unfortunate, because trailer blocks could have non-trailer
lines in them since 146245063e (trailer: allow non-trailers in trailer
block, 2016-10-21), which was before the iterator was created in
f0939a0eb1 (trailer: add interface for iterating over commit trailers,
2020-09-27).

So if trailer API users wanted to iterate over all lines in a trailer
block (including non-trailer lines), they could not use the iterator and
were forced to use the lower-level trailer_info struct directly (which
provides a raw string array that includes all lines in the trailer
block).

Change the iterator's behavior so that we also iterate over non-trailer
lines, instead of skipping over them. The new "raw" member of the
iterator allows API users to access previously inaccessible non-trailer
lines. Reword the variable "trailer" to just "line" because this
variable can now hold both trailer lines _and_ non-trailer lines.

The new "raw" member is important because anyone currently not using the
iterator is using trailer_info's raw string array directly to access
lines to check what the combined key + value looks like. If we didn't
provide a "raw" member here, iterator users would have to re-construct
the unparsed line by concatenating the key and value back together again
--- which places an undue burden for iterator users.

The next commit demonstrates the use of the iterator in sequencer.c as an
example of where "raw" will be useful, so that it can start using the
iterator.

For the existing use of the iterator in builtin/shortlog.c, we don't
have to change the code there because that code does

    trailer_iterator_init(&iter, body);
    while (trailer_iterator_advance(&iter)) {
        const char *value = iter.val.buf;

        if (!string_list_has_string(&log->trailers, iter.key.buf))
            continue;

        ...

and the

        if (!string_list_has_string(&log->trailers, iter.key.buf))

condition already skips over non-trailer lines (iter.key.buf is empty
for non-trailer lines, making the comparison still work even with this
commit).

Rename "num_expected_trailers" to "num_expected" in
t/unit-tests/t-trailer.c because the items we iterate over now include
non-trailer lines.

Signed-off-by: Linus Arver <linus@ucla.edu>
---
 t/unit-tests/t-trailer.c | 16 +++++++++++-----
 trailer.c                | 12 +++++-------
 trailer.h                |  7 +++++++
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c
index c1f897235c7..4f640d2a4b8 100644
--- a/t/unit-tests/t-trailer.c
+++ b/t/unit-tests/t-trailer.c
@@ -1,7 +1,7 @@
 #include "test-lib.h"
 #include "trailer.h"
 
-static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
+static void t_trailer_iterator(const char *msg, size_t num_expected)
 {
 	struct trailer_iterator iter;
 	size_t i = 0;
@@ -11,7 +11,7 @@ static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
 		i++;
 	trailer_iterator_release(&iter);
 
-	check_uint(i, ==, num_expected_trailers);
+	check_uint(i, ==, num_expected);
 }
 
 static void run_t_trailer_iterator(void)
@@ -19,7 +19,7 @@ static void run_t_trailer_iterator(void)
 	static struct test_cases {
 		const char *name;
 		const char *msg;
-		size_t num_expected_trailers;
+		size_t num_expected;
 	} tc[] = {
 		{
 			"empty input",
@@ -119,7 +119,13 @@ static void run_t_trailer_iterator(void)
 			"not a trailer line\n"
 			"not a trailer line\n"
 			"Signed-off-by: x\n",
-			1
+			/*
+			 * Even though there is only really 1 real "trailer"
+			 * (Signed-off-by), we still have 4 trailer objects
+			 * because we still want to iterate through the entire
+			 * block.
+			 */
+			4
 		},
 		{
 			"with non-trailer lines (one too many) in trailer block",
@@ -162,7 +168,7 @@ static void run_t_trailer_iterator(void)
 
 	for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
 		TEST(t_trailer_iterator(tc[i].msg,
-					tc[i].num_expected_trailers),
+					tc[i].num_expected),
 		     "%s", tc[i].name);
 	}
 }
diff --git a/trailer.c b/trailer.c
index 3e4dab9c065..4700c441442 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1146,17 +1146,15 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	while (iter->internal.cur < iter->internal.info.trailer_nr) {
-		char *trailer = iter->internal.info.trailers[iter->internal.cur++];
-		int separator_pos = find_separator(trailer, separators);
-
-		if (separator_pos < 1)
-			continue; /* not a real trailer */
+	if (iter->internal.cur < iter->internal.info.trailer_nr) {
+		char *line = iter->internal.info.trailers[iter->internal.cur++];
+		int separator_pos = find_separator(line, separators);
 
+		iter->raw = line;
 		strbuf_reset(&iter->key);
 		strbuf_reset(&iter->val);
 		parse_trailer(&iter->key, &iter->val, NULL,
-			      trailer, separator_pos);
+			      line, separator_pos);
 		/* Always unfold values during iteration. */
 		unfold_value(&iter->val);
 		return 1;
diff --git a/trailer.h b/trailer.h
index 9f42aa75994..7e36da7d13c 100644
--- a/trailer.h
+++ b/trailer.h
@@ -125,6 +125,13 @@ void format_trailers_from_commit(const struct process_trailer_options *,
  *   trailer_iterator_release(&iter);
  */
 struct trailer_iterator {
+	/*
+	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
+	 * key/val pair as part of a trailer block (as the "key" and "val"
+	 * fields below). If a line fails to parse as a trailer, then the "key"
+	 * will be the entire line and "val" will be the empty string.
+	 */
+	const char *raw;
 	struct strbuf key;
 	struct strbuf val;
 
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v4 04/10] sequencer: use the trailer iterator
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
                         ` (2 preceding siblings ...)
  2024-05-02  4:54       ` [PATCH v4 03/10] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
@ 2024-05-02  4:54       ` Linus Arver via GitGitGadget
  2024-05-02  4:54       ` [PATCH v4 05/10] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
                         ` (6 subsequent siblings)
  10 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-05-02  4:54 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linus@ucla.edu>

Instead of calling "trailer_info_get()", which is a low-level function
in the trailers implementation (trailer.c), call
trailer_iterator_advance(), which was specifically designed for public
consumption in f0939a0eb1 (trailer: add interface for iterating over
commit trailers, 2020-09-27).

Avoiding "trailer_info_get()" means we don't have to worry about options
like "no_divider" (relevant for parsing trailers). We also don't have to
check for things like "info.trailer_start == info.trailer_end" to see
whether there were any trailers (instead we can just check to see
whether the iterator advanced at all).

Note how we have to use "iter.raw" in order to get the same behavior as
before when we iterated over the unparsed string array (char **trailers)
in trailer_info.

Signed-off-by: Linus Arver <linus@ucla.edu>
---
 sequencer.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/sequencer.c b/sequencer.c
index ea1441e6174..4c1f6c675e7 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -319,35 +319,32 @@ static const char *get_todo_path(const struct replay_opts *opts)
 static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob,
 	size_t ignore_footer)
 {
-	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
-	struct trailer_info info;
-	size_t i;
+	struct trailer_iterator iter;
+	size_t i = 0;
 	int found_sob = 0, found_sob_last = 0;
 	char saved_char;
 
-	opts.no_divider = 1;
-
 	if (ignore_footer) {
 		saved_char = sb->buf[sb->len - ignore_footer];
 		sb->buf[sb->len - ignore_footer] = '\0';
 	}
 
-	trailer_info_get(&opts, sb->buf, &info);
+	trailer_iterator_init(&iter, sb->buf);
 
 	if (ignore_footer)
 		sb->buf[sb->len - ignore_footer] = saved_char;
 
-	if (info.trailer_block_start == info.trailer_block_end)
-		return 0;
+	while (trailer_iterator_advance(&iter)) {
+		i++;
+		if (sob && !strncmp(iter.raw, sob->buf, sob->len))
+			found_sob = i;
+	}
+	trailer_iterator_release(&iter);
 
-	for (i = 0; i < info.trailer_nr; i++)
-		if (sob && !strncmp(info.trailers[i], sob->buf, sob->len)) {
-			found_sob = 1;
-			if (i == info.trailer_nr - 1)
-				found_sob_last = 1;
-		}
+	if (!i)
+		return 0;
 
-	trailer_info_release(&info);
+	found_sob_last = (int)i == found_sob;
 
 	if (found_sob_last)
 		return 3;
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v4 05/10] interpret-trailers: access trailer_info with new helpers
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
                         ` (3 preceding siblings ...)
  2024-05-02  4:54       ` [PATCH v4 04/10] sequencer: use the trailer iterator Linus Arver via GitGitGadget
@ 2024-05-02  4:54       ` Linus Arver via GitGitGadget
  2024-05-02  4:54       ` [PATCH v4 06/10] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
                         ` (5 subsequent siblings)
  10 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-05-02  4:54 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linus@ucla.edu>

Instead of directly accessing trailer_info members, access them
indirectly through new helper functions exposed by the trailer API.

This is the first of two preparatory commits which will allow us to
use the so-called "pimpl" (pointer to implementation) idiom for the
trailer API, by making the trailer_info struct private to the trailer
implementation (and thus hidden from the API).

Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linus@ucla.edu>
---
 builtin/interpret-trailers.c | 12 ++++++------
 trailer.c                    | 21 +++++++++++++++++++++
 trailer.h                    |  4 ++++
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/builtin/interpret-trailers.c b/builtin/interpret-trailers.c
index 11f4ce9e4a2..f3240682e35 100644
--- a/builtin/interpret-trailers.c
+++ b/builtin/interpret-trailers.c
@@ -141,7 +141,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	LIST_HEAD(head);
 	struct strbuf sb = STRBUF_INIT;
 	struct strbuf trailer_block = STRBUF_INIT;
-	struct trailer_info info;
+	struct trailer_info *info = trailer_info_new();
 	FILE *outfile = stdout;
 
 	trailer_config_init();
@@ -151,13 +151,13 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	if (opts->in_place)
 		outfile = create_in_place_tempfile(file);
 
-	parse_trailers(opts, &info, sb.buf, &head);
+	parse_trailers(opts, info, sb.buf, &head);
 
 	/* Print the lines before the trailers */
 	if (!opts->only_trailers)
-		fwrite(sb.buf, 1, info.trailer_block_start, outfile);
+		fwrite(sb.buf, 1, trailer_block_start(info), outfile);
 
-	if (!opts->only_trailers && !info.blank_line_before_trailer)
+	if (!opts->only_trailers && !blank_line_before_trailer_block(info))
 		fprintf(outfile, "\n");
 
 
@@ -178,8 +178,8 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 
 	/* Print the lines after the trailers as is */
 	if (!opts->only_trailers)
-		fwrite(sb.buf + info.trailer_block_end, 1, sb.len - info.trailer_block_end, outfile);
-	trailer_info_release(&info);
+		fwrite(sb.buf + trailer_block_end(info), 1, sb.len - trailer_block_end(info), outfile);
+	trailer_info_release(info);
 
 	if (opts->in_place)
 		if (rename_tempfile(&trailers_tempfile, file))
diff --git a/trailer.c b/trailer.c
index 4700c441442..95b4c9b8f19 100644
--- a/trailer.c
+++ b/trailer.c
@@ -952,6 +952,12 @@ static void unfold_value(struct strbuf *val)
 	strbuf_release(&out);
 }
 
+struct trailer_info *trailer_info_new(void)
+{
+	struct trailer_info *info = xcalloc(1, sizeof(*info));
+	return info;
+}
+
 /*
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
@@ -1000,6 +1006,21 @@ void free_trailers(struct list_head *trailers)
 	}
 }
 
+size_t trailer_block_start(struct trailer_info *info)
+{
+	return info->trailer_block_start;
+}
+
+size_t trailer_block_end(struct trailer_info *info)
+{
+	return info->trailer_block_end;
+}
+
+int blank_line_before_trailer_block(struct trailer_info *info)
+{
+	return info->blank_line_before_trailer;
+}
+
 void trailer_info_get(const struct process_trailer_options *opts,
 		      const char *str,
 		      struct trailer_info *info)
diff --git a/trailer.h b/trailer.h
index 7e36da7d13c..9ba96721398 100644
--- a/trailer.h
+++ b/trailer.h
@@ -97,6 +97,10 @@ void parse_trailers(const struct process_trailer_options *,
 void trailer_info_get(const struct process_trailer_options *,
 		      const char *str,
 		      struct trailer_info *);
+size_t trailer_block_start(struct trailer_info *);
+size_t trailer_block_end(struct trailer_info *);
+int blank_line_before_trailer_block(struct trailer_info *);
+struct trailer_info *trailer_info_new(void);
 
 void trailer_info_release(struct trailer_info *info);
 
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v4 06/10] trailer: make parse_trailers() return trailer_info pointer
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
                         ` (4 preceding siblings ...)
  2024-05-02  4:54       ` [PATCH v4 05/10] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
@ 2024-05-02  4:54       ` Linus Arver via GitGitGadget
  2024-05-02  4:54       ` [PATCH v4 07/10] trailer: make trailer_info struct private Linus Arver via GitGitGadget
                         ` (4 subsequent siblings)
  10 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-05-02  4:54 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linus@ucla.edu>

This is the second and final preparatory commit for making the
trailer_info struct private to the trailer implementation.

Make trailer_info_get() do the actual work of allocating a new
trailer_info struct, and return a pointer to it. Because
parse_trailers() wraps around trailer_info_get(), it too can return this
pointer to the caller. From the trailer API user's perspective, the call
to trailer_info_new() can be replaced with parse_trailers(); do so in
interpret-trailers.

Because trailer_info_new() is no longer called by interpret-trailers,
remove this function from the trailer API.

With this change, we no longer allocate trailer_info on the stack ---
all uses of it are via a pointer where the actual data is always
allocated at runtime through trailer_info_new(). Make
trailer_info_release() free this dynamically allocated memory.

Finally, due to the way the function signatures of parse_trailers() and
trailer_info_get() have changed, update the callsites in
format_trailers_from_commit() and trailer_iterator_init() accordingly.

Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linus@ucla.edu>
---
 builtin/interpret-trailers.c |  4 ++--
 trailer.c                    | 41 +++++++++++++++++++-----------------
 trailer.h                    | 17 ++++++---------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/builtin/interpret-trailers.c b/builtin/interpret-trailers.c
index f3240682e35..6bf8cec005a 100644
--- a/builtin/interpret-trailers.c
+++ b/builtin/interpret-trailers.c
@@ -141,7 +141,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	LIST_HEAD(head);
 	struct strbuf sb = STRBUF_INIT;
 	struct strbuf trailer_block = STRBUF_INIT;
-	struct trailer_info *info = trailer_info_new();
+	struct trailer_info *info;
 	FILE *outfile = stdout;
 
 	trailer_config_init();
@@ -151,7 +151,7 @@ static void interpret_trailers(const struct process_trailer_options *opts,
 	if (opts->in_place)
 		outfile = create_in_place_tempfile(file);
 
-	parse_trailers(opts, info, sb.buf, &head);
+	info = parse_trailers(opts, sb.buf, &head);
 
 	/* Print the lines before the trailers */
 	if (!opts->only_trailers)
diff --git a/trailer.c b/trailer.c
index 95b4c9b8f19..9179dd802c6 100644
--- a/trailer.c
+++ b/trailer.c
@@ -952,7 +952,7 @@ static void unfold_value(struct strbuf *val)
 	strbuf_release(&out);
 }
 
-struct trailer_info *trailer_info_new(void)
+static struct trailer_info *trailer_info_new(void)
 {
 	struct trailer_info *info = xcalloc(1, sizeof(*info));
 	return info;
@@ -962,16 +962,16 @@ struct trailer_info *trailer_info_new(void)
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
  */
-void parse_trailers(const struct process_trailer_options *opts,
-		    struct trailer_info *info,
-		    const char *str,
-		    struct list_head *head)
+struct trailer_info *parse_trailers(const struct process_trailer_options *opts,
+				    const char *str,
+				    struct list_head *head)
 {
+	struct trailer_info *info;
 	struct strbuf tok = STRBUF_INIT;
 	struct strbuf val = STRBUF_INIT;
 	size_t i;
 
-	trailer_info_get(opts, str, info);
+	info = trailer_info_get(opts, str);
 
 	for (i = 0; i < info->trailer_nr; i++) {
 		int separator_pos;
@@ -995,6 +995,8 @@ void parse_trailers(const struct process_trailer_options *opts,
 					 strbuf_detach(&val, NULL));
 		}
 	}
+
+	return info;
 }
 
 void free_trailers(struct list_head *trailers)
@@ -1021,10 +1023,10 @@ int blank_line_before_trailer_block(struct trailer_info *info)
 	return info->blank_line_before_trailer;
 }
 
-void trailer_info_get(const struct process_trailer_options *opts,
-		      const char *str,
-		      struct trailer_info *info)
+struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
+				      const char *str)
 {
+	struct trailer_info *info = trailer_info_new();
 	size_t end_of_log_message = 0, trailer_block_start = 0;
 	struct strbuf **trailer_lines, **ptr;
 	char **trailer_strings = NULL;
@@ -1063,6 +1065,8 @@ void trailer_info_get(const struct process_trailer_options *opts,
 	info->trailer_block_end = end_of_log_message;
 	info->trailers = trailer_strings;
 	info->trailer_nr = nr;
+
+	return info;
 }
 
 void trailer_info_release(struct trailer_info *info)
@@ -1071,6 +1075,7 @@ void trailer_info_release(struct trailer_info *info)
 	for (i = 0; i < info->trailer_nr; i++)
 		free(info->trailers[i]);
 	free(info->trailers);
+	free(info);
 }
 
 void format_trailers(const struct process_trailer_options *opts,
@@ -1138,21 +1143,19 @@ void format_trailers_from_commit(const struct process_trailer_options *opts,
 				 struct strbuf *out)
 {
 	LIST_HEAD(trailer_objects);
-	struct trailer_info info;
-
-	parse_trailers(opts, &info, msg, &trailer_objects);
+	struct trailer_info *info = parse_trailers(opts, msg, &trailer_objects);
 
 	/* If we want the whole block untouched, we can take the fast path. */
 	if (!opts->only_trailers && !opts->unfold && !opts->filter &&
 	    !opts->separator && !opts->key_only && !opts->value_only &&
 	    !opts->key_value_separator) {
-		strbuf_add(out, msg + info.trailer_block_start,
-			   info.trailer_block_end - info.trailer_block_start);
+		strbuf_add(out, msg + info->trailer_block_start,
+			   info->trailer_block_end - info->trailer_block_start);
 	} else
 		format_trailers(opts, &trailer_objects, out);
 
 	free_trailers(&trailer_objects);
-	trailer_info_release(&info);
+	trailer_info_release(info);
 }
 
 void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
@@ -1161,14 +1164,14 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 	strbuf_init(&iter->key, 0);
 	strbuf_init(&iter->val, 0);
 	opts.no_divider = 1;
-	trailer_info_get(&opts, msg, &iter->internal.info);
+	iter->internal.info = trailer_info_get(&opts, msg);
 	iter->internal.cur = 0;
 }
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	if (iter->internal.cur < iter->internal.info.trailer_nr) {
-		char *line = iter->internal.info.trailers[iter->internal.cur++];
+	if (iter->internal.cur < iter->internal.info->trailer_nr) {
+		char *line = iter->internal.info->trailers[iter->internal.cur++];
 		int separator_pos = find_separator(line, separators);
 
 		iter->raw = line;
@@ -1185,7 +1188,7 @@ int trailer_iterator_advance(struct trailer_iterator *iter)
 
 void trailer_iterator_release(struct trailer_iterator *iter)
 {
-	trailer_info_release(&iter->internal.info);
+	trailer_info_release(iter->internal.info);
 	strbuf_release(&iter->val);
 	strbuf_release(&iter->key);
 }
diff --git a/trailer.h b/trailer.h
index 9ba96721398..c8c0018c54d 100644
--- a/trailer.h
+++ b/trailer.h
@@ -89,18 +89,15 @@ void parse_trailers_from_command_line_args(struct list_head *arg_head,
 void process_trailers_lists(struct list_head *head,
 			    struct list_head *arg_head);
 
-void parse_trailers(const struct process_trailer_options *,
-		    struct trailer_info *,
-		    const char *str,
-		    struct list_head *head);
-
-void trailer_info_get(const struct process_trailer_options *,
-		      const char *str,
-		      struct trailer_info *);
+struct trailer_info *parse_trailers(const struct process_trailer_options *,
+				    const char *str,
+				    struct list_head *head);
+struct trailer_info *trailer_info_get(const struct process_trailer_options *,
+				      const char *str);
+
 size_t trailer_block_start(struct trailer_info *);
 size_t trailer_block_end(struct trailer_info *);
 int blank_line_before_trailer_block(struct trailer_info *);
-struct trailer_info *trailer_info_new(void);
 
 void trailer_info_release(struct trailer_info *info);
 
@@ -141,7 +138,7 @@ struct trailer_iterator {
 
 	/* private */
 	struct {
-		struct trailer_info info;
+		struct trailer_info *info;
 		size_t cur;
 	} internal;
 };
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v4 07/10] trailer: make trailer_info struct private
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
                         ` (5 preceding siblings ...)
  2024-05-02  4:54       ` [PATCH v4 06/10] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
@ 2024-05-02  4:54       ` Linus Arver via GitGitGadget
  2024-05-02  4:54       ` [PATCH v4 08/10] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
                         ` (3 subsequent siblings)
  10 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-05-02  4:54 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linus@ucla.edu>

In 13211ae23f (trailer: separate public from internal portion of
trailer_iterator, 2023-09-09) we moved trailer_info behind an anonymous
struct to discourage use by trailer.h API users. However it still left
open the possibility of external use of trailer_info itself. Now that
there are no external users of trailer_info, we can make this struct
private.

Make this struct private by putting its definition inside trailer.c.
This has two benefits:

  (1) it makes the surface area of the public facing
      interface (trailer.h) smaller, and

  (2) external API users are unable to peer inside this struct (because
      it is only ever exposed as an opaque pointer).

There are a few disadvantages:

  (A) every time the member of the struct is accessed an extra pointer
      dereference must be done, and

  (B) for users of trailer_info outside trailer.c, this struct can no
      longer be allocated on the stack and may only be allocated on the
      heap (because its definition is hidden away in trailer.c) and
      appropriately deallocated by the user, and

  (C) without good documentation on the API, the opaque struct is
      hostile to programmers by going opposite to the "Show me your
      data structures, and I won't usually need your code; it'll
      be obvious." mantra [2].

(The disadvantages have already been observed in the two preparatory
commits that precede this one.) This commit believes that the benefits
outweigh the disadvantages for designing APIs, as explained below.

Making trailer_info private exposes existing deficiencies in the API.
This is because users of this struct had full access to its internals,
so there wasn't much need to actually design it to be "complete" in the
sense that API users only needed to use what was provided by the API.
For example, the location of the trailer block (start/end offsets
relative to the start of the input text) was accessible by looking at
these struct members directly. Now that the struct is private, we have
to expose new API functions to allow clients to access this
information (see builtin/interpret-trailers.c).

The idea in this commit to hide implementation details behind an "opaque
pointer" is also known as the "pimpl" (pointer to implementation) idiom
in C++ and is a common pattern in that language (where, for example,
abstract classes only have pointers to concrete classes).

However, the original inspiration to use this idiom does not come from
C++, but instead the book "C Interfaces and Implementations: Techniques
for Creating Reusable Software" [1]. This book recommends opaque
pointers as a good design principle for designing C libraries, using the
term "interface" as the functions defined in *.h (header) files and
"implementation" as the corresponding *.c file which define the
interfaces.

The book says this about opaque pointers:

    ... clients can manipulate such pointers freely, but they can’t
    dereference them; that is, they can’t look at the innards of the
    structure pointed to by them. Only the implementation has that
    privilege. Opaque pointers hide representation details and help
    catch errors.

In our case, "struct trailer_info" is now hidden from clients, and the
ways in which this opaque pointer can be used is limited to the richness
of <trailer.h>. In other words, <trailer.h> exclusively controls exactly
how "trailer_info" pointers are to be used.

[1] Hanson, David R. "C Interfaces and Implementations: Techniques for
    Creating Reusable Software". Addison Wesley, 1997. p. 22

[2] Raymond, Eric S. "The Cathedral and the Bazaar: Musings on Linux and
    Open Source by an Accidental Revolutionary". O'Reilly, 1999.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Linus Arver <linus@ucla.edu>
---
 trailer.c | 21 +++++++++++++++++++++
 trailer.h | 23 ++---------------------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/trailer.c b/trailer.c
index 9179dd802c6..6167b707ae0 100644
--- a/trailer.c
+++ b/trailer.c
@@ -11,6 +11,27 @@
  * Copyright (c) 2013, 2014 Christian Couder <chriscool@tuxfamily.org>
  */
 
+struct trailer_info {
+	/*
+	 * True if there is a blank line before the location pointed to by
+	 * trailer_block_start.
+	 */
+	int blank_line_before_trailer;
+
+	/*
+	 * Offsets to the trailer block start and end positions in the input
+	 * string. If no trailer block is found, these are both set to the
+	 * "true" end of the input (find_end_of_log_message()).
+	 */
+	size_t trailer_block_start, trailer_block_end;
+
+	/*
+	 * Array of trailers found.
+	 */
+	char **trailers;
+	size_t trailer_nr;
+};
+
 struct conf_info {
 	char *name;
 	char *key;
diff --git a/trailer.h b/trailer.h
index c8c0018c54d..4e0a6789d7f 100644
--- a/trailer.h
+++ b/trailer.h
@@ -4,6 +4,8 @@
 #include "list.h"
 #include "strbuf.h"
 
+struct trailer_info;
+
 enum trailer_where {
 	WHERE_DEFAULT,
 	WHERE_END,
@@ -29,27 +31,6 @@ int trailer_set_where(enum trailer_where *item, const char *value);
 int trailer_set_if_exists(enum trailer_if_exists *item, const char *value);
 int trailer_set_if_missing(enum trailer_if_missing *item, const char *value);
 
-struct trailer_info {
-	/*
-	 * True if there is a blank line before the location pointed to by
-	 * trailer_block_start.
-	 */
-	int blank_line_before_trailer;
-
-	/*
-	 * Offsets to the trailer block start and end positions in the input
-	 * string. If no trailer block is found, these are both set to the
-	 * "true" end of the input (find_end_of_log_message()).
-	 */
-	size_t trailer_block_start, trailer_block_end;
-
-	/*
-	 * Array of trailers found.
-	 */
-	char **trailers;
-	size_t trailer_nr;
-};
-
 /*
  * A list that represents newly-added trailers, such as those provided
  * with the --trailer command line option of git-interpret-trailers.
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v4 08/10] trailer: retire trailer_info_get() from API
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
                         ` (6 preceding siblings ...)
  2024-05-02  4:54       ` [PATCH v4 07/10] trailer: make trailer_info struct private Linus Arver via GitGitGadget
@ 2024-05-02  4:54       ` Linus Arver via GitGitGadget
  2024-05-02  4:54       ` [PATCH v4 09/10] trailer: document parse_trailers() usage Linus Arver via GitGitGadget
                         ` (2 subsequent siblings)
  10 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-05-02  4:54 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linus@ucla.edu>

Make trailer_info_get() "static" to be file-scoped to trailer.c, because
no one outside of trailer.c uses it. Remove its declaration from
<trailer.h>.

We have to also reposition it to be above parse_trailers(), which
depends on it.

Signed-off-by: Linus Arver <linus@ucla.edu>
---
 trailer.c | 92 +++++++++++++++++++++++++++----------------------------
 trailer.h |  2 --
 2 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/trailer.c b/trailer.c
index 6167b707ae0..33b6aa7e8bd 100644
--- a/trailer.c
+++ b/trailer.c
@@ -979,6 +979,52 @@ static struct trailer_info *trailer_info_new(void)
 	return info;
 }
 
+static struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
+					     const char *str)
+{
+	struct trailer_info *info = trailer_info_new();
+	size_t end_of_log_message = 0, trailer_block_start = 0;
+	struct strbuf **trailer_lines, **ptr;
+	char **trailer_strings = NULL;
+	size_t nr = 0, alloc = 0;
+	char **last = NULL;
+
+	trailer_config_init();
+
+	end_of_log_message = find_end_of_log_message(str, opts->no_divider);
+	trailer_block_start = find_trailer_block_start(str, end_of_log_message);
+
+	trailer_lines = strbuf_split_buf(str + trailer_block_start,
+					 end_of_log_message - trailer_block_start,
+					 '\n',
+					 0);
+	for (ptr = trailer_lines; *ptr; ptr++) {
+		if (last && isspace((*ptr)->buf[0])) {
+			struct strbuf sb = STRBUF_INIT;
+			strbuf_attach(&sb, *last, strlen(*last), strlen(*last));
+			strbuf_addbuf(&sb, *ptr);
+			*last = strbuf_detach(&sb, NULL);
+			continue;
+		}
+		ALLOC_GROW(trailer_strings, nr + 1, alloc);
+		trailer_strings[nr] = strbuf_detach(*ptr, NULL);
+		last = find_separator(trailer_strings[nr], separators) >= 1
+			? &trailer_strings[nr]
+			: NULL;
+		nr++;
+	}
+	strbuf_list_free(trailer_lines);
+
+	info->blank_line_before_trailer = ends_with_blank_line(str,
+							       trailer_block_start);
+	info->trailer_block_start = trailer_block_start;
+	info->trailer_block_end = end_of_log_message;
+	info->trailers = trailer_strings;
+	info->trailer_nr = nr;
+
+	return info;
+}
+
 /*
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
@@ -1044,52 +1090,6 @@ int blank_line_before_trailer_block(struct trailer_info *info)
 	return info->blank_line_before_trailer;
 }
 
-struct trailer_info *trailer_info_get(const struct process_trailer_options *opts,
-				      const char *str)
-{
-	struct trailer_info *info = trailer_info_new();
-	size_t end_of_log_message = 0, trailer_block_start = 0;
-	struct strbuf **trailer_lines, **ptr;
-	char **trailer_strings = NULL;
-	size_t nr = 0, alloc = 0;
-	char **last = NULL;
-
-	trailer_config_init();
-
-	end_of_log_message = find_end_of_log_message(str, opts->no_divider);
-	trailer_block_start = find_trailer_block_start(str, end_of_log_message);
-
-	trailer_lines = strbuf_split_buf(str + trailer_block_start,
-					 end_of_log_message - trailer_block_start,
-					 '\n',
-					 0);
-	for (ptr = trailer_lines; *ptr; ptr++) {
-		if (last && isspace((*ptr)->buf[0])) {
-			struct strbuf sb = STRBUF_INIT;
-			strbuf_attach(&sb, *last, strlen(*last), strlen(*last));
-			strbuf_addbuf(&sb, *ptr);
-			*last = strbuf_detach(&sb, NULL);
-			continue;
-		}
-		ALLOC_GROW(trailer_strings, nr + 1, alloc);
-		trailer_strings[nr] = strbuf_detach(*ptr, NULL);
-		last = find_separator(trailer_strings[nr], separators) >= 1
-			? &trailer_strings[nr]
-			: NULL;
-		nr++;
-	}
-	strbuf_list_free(trailer_lines);
-
-	info->blank_line_before_trailer = ends_with_blank_line(str,
-							       trailer_block_start);
-	info->trailer_block_start = trailer_block_start;
-	info->trailer_block_end = end_of_log_message;
-	info->trailers = trailer_strings;
-	info->trailer_nr = nr;
-
-	return info;
-}
-
 void trailer_info_release(struct trailer_info *info)
 {
 	size_t i;
diff --git a/trailer.h b/trailer.h
index 4e0a6789d7f..b0ec7658c67 100644
--- a/trailer.h
+++ b/trailer.h
@@ -73,8 +73,6 @@ void process_trailers_lists(struct list_head *head,
 struct trailer_info *parse_trailers(const struct process_trailer_options *,
 				    const char *str,
 				    struct list_head *head);
-struct trailer_info *trailer_info_get(const struct process_trailer_options *,
-				      const char *str);
 
 size_t trailer_block_start(struct trailer_info *);
 size_t trailer_block_end(struct trailer_info *);
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v4 09/10] trailer: document parse_trailers() usage
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
                         ` (7 preceding siblings ...)
  2024-05-02  4:54       ` [PATCH v4 08/10] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
@ 2024-05-02  4:54       ` Linus Arver via GitGitGadget
  2024-05-02  4:54       ` [PATCH v4 10/10] trailer unit tests: inspect iterator contents Linus Arver via GitGitGadget
  2024-05-02 17:15       ` [PATCH v4 00/10] Make trailer_info struct private (plus sequencer cleanup) Junio C Hamano
  10 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-05-02  4:54 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linus@ucla.edu>

Explain how to use parse_trailers(), because earlier we made the
trailer_info struct opaque. That is, because clients can no longer peek
inside it, we should give them guidance about how the (pointer to the)
opaque struct can still be useful to them.

Rename "head" struct to "trailer_objects" to make the wording of the new
comments a bit easier to read (because "head" itself doesn't really have
any domain-specific meaning here).

Signed-off-by: Linus Arver <linus@ucla.edu>
---
 trailer.c |  8 ++++----
 trailer.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/trailer.c b/trailer.c
index 33b6aa7e8bd..406745264aa 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1026,12 +1026,12 @@ static struct trailer_info *trailer_info_get(const struct process_trailer_option
 }
 
 /*
- * Parse trailers in "str", populating the trailer info and "head"
+ * Parse trailers in "str", populating the trailer info and "trailer_objects"
  * linked list structure.
  */
 struct trailer_info *parse_trailers(const struct process_trailer_options *opts,
 				    const char *str,
-				    struct list_head *head)
+				    struct list_head *trailer_objects)
 {
 	struct trailer_info *info;
 	struct strbuf tok = STRBUF_INIT;
@@ -1051,13 +1051,13 @@ struct trailer_info *parse_trailers(const struct process_trailer_options *opts,
 				      separator_pos);
 			if (opts->unfold)
 				unfold_value(&val);
-			add_trailer_item(head,
+			add_trailer_item(trailer_objects,
 					 strbuf_detach(&tok, NULL),
 					 strbuf_detach(&val, NULL));
 		} else if (!opts->only_trailers) {
 			strbuf_addstr(&val, trailer);
 			strbuf_strip_suffix(&val, "\n");
-			add_trailer_item(head,
+			add_trailer_item(trailer_objects,
 					 NULL,
 					 strbuf_detach(&val, NULL));
 		}
diff --git a/trailer.h b/trailer.h
index b0ec7658c67..82104912d70 100644
--- a/trailer.h
+++ b/trailer.h
@@ -70,14 +70,63 @@ void parse_trailers_from_command_line_args(struct list_head *arg_head,
 void process_trailers_lists(struct list_head *head,
 			    struct list_head *arg_head);
 
+/*
+ * Given some input string "str", return a pointer to an opaque trailer_info
+ * structure. Also populate the trailer_objects list with parsed trailer
+ * objects. Internally this calls trailer_info_get() to get the opaque pointer,
+ * but does some extra work to populate the trailer_objects linked list.
+ *
+ * The opaque trailer_info pointer can be used to check the position of the
+ * trailer block as offsets relative to the beginning of "str" in
+ * trailer_block_start() and trailer_block_end().
+ * blank_line_before_trailer_block() returns 1 if there is a blank line just
+ * before the trailer block. All of these functions are useful for preserving
+ * the input before and after the trailer block, if we were to write out the
+ * original input (but with the trailer block itself modified); see
+ * builtin/interpret-trailers.c for an example.
+ *
+ * For iterating through the parsed trailer block (if you don't care about the
+ * position of the trailer block itself in the context of the larger string text
+ * from which it was parsed), please see trailer_iterator_init() which uses the
+ * trailer_info struct internally.
+ *
+ * Lastly, callers should call trailer_info_release() when they are done using
+ * the opaque pointer.
+ *
+ * NOTE: Callers should treat both trailer_info and trailer_objects as
+ * read-only items, because there is some overlap between the two (trailer_info
+ * has "char **trailers" string array, and trailer_objects will have the same
+ * data but as a linked list of trailer_item objects). This API does not perform
+ * any synchronization between the two. In the future we should be able to
+ * reduce the duplication and use just the linked list.
+ */
 struct trailer_info *parse_trailers(const struct process_trailer_options *,
 				    const char *str,
-				    struct list_head *head);
+				    struct list_head *trailer_objects);
 
+/*
+ * Return the offset of the start of the trailer block. That is, 0 is the start
+ * of the input ("str" in parse_trailers()) and some other positive number
+ * indicates how many bytes we have to skip over before we get to the beginning
+ * of the trailer block.
+ */
 size_t trailer_block_start(struct trailer_info *);
+
+/*
+ * Return the end of the trailer block, again relative to the start of the
+ * input.
+ */
 size_t trailer_block_end(struct trailer_info *);
+
+/*
+ * Return 1 if the trailer block had an extra newline (blank line) just before
+ * it.
+ */
 int blank_line_before_trailer_block(struct trailer_info *);
 
+/*
+ * Free trailer_info struct.
+ */
 void trailer_info_release(struct trailer_info *info);
 
 void trailer_config_init(void);
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH v4 10/10] trailer unit tests: inspect iterator contents
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
                         ` (8 preceding siblings ...)
  2024-05-02  4:54       ` [PATCH v4 09/10] trailer: document parse_trailers() usage Linus Arver via GitGitGadget
@ 2024-05-02  4:54       ` Linus Arver via GitGitGadget
  2024-05-02 17:15       ` [PATCH v4 00/10] Make trailer_info struct private (plus sequencer cleanup) Junio C Hamano
  10 siblings, 0 replies; 67+ messages in thread
From: Linus Arver via GitGitGadget @ 2024-05-02  4:54 UTC (permalink / raw)
  To: git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver, Linus Arver, Linus Arver

From: Linus Arver <linus@ucla.edu>

Previously we only checked whether we would iterate a certain (expected)
number of times.

Also check the parsed "raw", "key" and "val" fields during each
iteration.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Linus Arver <linus@ucla.edu>
---
 t/unit-tests/t-trailer.c | 161 +++++++++++++++++++++++++++++++++++----
 1 file changed, 148 insertions(+), 13 deletions(-)

diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c
index 4f640d2a4b8..2ecca359d96 100644
--- a/t/unit-tests/t-trailer.c
+++ b/t/unit-tests/t-trailer.c
@@ -1,14 +1,27 @@
 #include "test-lib.h"
 #include "trailer.h"
 
-static void t_trailer_iterator(const char *msg, size_t num_expected)
+struct contents {
+	const char *raw;
+	const char *key;
+	const char *val;
+};
+
+static void t_trailer_iterator(const char *msg, size_t num_expected,
+			       struct contents *contents)
 {
 	struct trailer_iterator iter;
 	size_t i = 0;
 
 	trailer_iterator_init(&iter, msg);
-	while (trailer_iterator_advance(&iter))
+	while (trailer_iterator_advance(&iter)) {
+		if (num_expected) {
+			check_str(iter.raw, contents[i].raw);
+			check_str(iter.key.buf, contents[i].key);
+			check_str(iter.val.buf, contents[i].val);
+		}
 		i++;
+	}
 	trailer_iterator_release(&iter);
 
 	check_uint(i, ==, num_expected);
@@ -16,22 +29,26 @@ static void t_trailer_iterator(const char *msg, size_t num_expected)
 
 static void run_t_trailer_iterator(void)
 {
+
 	static struct test_cases {
 		const char *name;
 		const char *msg;
 		size_t num_expected;
+		struct contents contents[10];
 	} tc[] = {
 		{
 			"empty input",
 			"",
-			0
+			0,
+			{{0}},
 		},
 		{
 			"no newline at beginning",
 			"Fixes: x\n"
 			"Acked-by: x\n"
 			"Reviewed-by: x\n",
-			0
+			0,
+			{{0}},
 		},
 		{
 			"newline at beginning",
@@ -39,7 +56,27 @@ static void run_t_trailer_iterator(void)
 			"Fixes: x\n"
 			"Acked-by: x\n"
 			"Reviewed-by: x\n",
-			3
+			3,
+			{
+				{
+					.raw = "Fixes: x\n",
+					.key = "Fixes",
+					.val = "x",
+				},
+				{
+					.raw = "Acked-by: x\n",
+					.key = "Acked-by",
+					.val = "x",
+				},
+				{
+					.raw = "Reviewed-by: x\n",
+					.key = "Reviewed-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"without body text",
@@ -48,7 +85,27 @@ static void run_t_trailer_iterator(void)
 			"Fixes: x\n"
 			"Acked-by: x\n"
 			"Reviewed-by: x\n",
-			3
+			3,
+			{
+				{
+					.raw = "Fixes: x\n",
+					.key = "Fixes",
+					.val = "x",
+				},
+				{
+					.raw = "Acked-by: x\n",
+					.key = "Acked-by",
+					.val = "x",
+				},
+				{
+					.raw = "Reviewed-by: x\n",
+					.key = "Reviewed-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"with body text, without divider",
@@ -63,7 +120,32 @@ static void run_t_trailer_iterator(void)
 			"Acked-by: x\n"
 			"Reviewed-by: x\n"
 			"Signed-off-by: x\n",
-			4
+			4,
+			{
+				{
+					.raw = "Fixes: x\n",
+					.key = "Fixes",
+					.val = "x",
+				},
+				{
+					.raw = "Acked-by: x\n",
+					.key = "Acked-by",
+					.val = "x",
+				},
+				{
+					.raw = "Reviewed-by: x\n",
+					.key = "Reviewed-by",
+					.val = "x",
+				},
+				{
+					.raw = "Signed-off-by: x\n",
+					.key = "Signed-off-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"with body text, without divider (second trailer block)",
@@ -85,7 +167,22 @@ static void run_t_trailer_iterator(void)
 			 */
 			"Helped-by: x\n"
 			"Signed-off-by: x\n",
-			2
+			2,
+			{
+				{
+					.raw = "Helped-by: x\n",
+					.key = "Helped-by",
+					.val = "x",
+				},
+				{
+					.raw = "Signed-off-by: x\n",
+					.key = "Signed-off-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"with body text, with divider",
@@ -103,7 +200,17 @@ static void run_t_trailer_iterator(void)
 			 * always ignores the divider.
 			 */
 			"Signed-off-by: x\n",
-			1
+			1,
+			{
+				{
+					.raw = "Signed-off-by: x\n",
+					.key = "Signed-off-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"with non-trailer lines in trailer block",
@@ -125,7 +232,32 @@ static void run_t_trailer_iterator(void)
 			 * because we still want to iterate through the entire
 			 * block.
 			 */
-			4
+			4,
+			{
+				{
+					.raw = "not a trailer line\n",
+					.key = "not a trailer line",
+					.val = "",
+				},
+				{
+					.raw = "not a trailer line\n",
+					.key = "not a trailer line",
+					.val = "",
+				},
+				{
+					.raw = "not a trailer line\n",
+					.key = "not a trailer line",
+					.val = "",
+				},
+				{
+					.raw = "Signed-off-by: x\n",
+					.key = "Signed-off-by",
+					.val = "x",
+				},
+				{
+					0
+				},
+			},
 		},
 		{
 			"with non-trailer lines (one too many) in trailer block",
@@ -140,7 +272,8 @@ static void run_t_trailer_iterator(void)
 			"not a trailer line\n"
 			"not a trailer line\n"
 			"Signed-off-by: x\n",
-			0
+			0,
+			{{0}},
 		},
 		{
 			"with non-trailer lines (only 1) in trailer block, but no Git-generated trailers",
@@ -162,13 +295,15 @@ static void run_t_trailer_iterator(void)
 			"Acked-by: x\n"
 			"Acked-by: x\n"
 			"not a trailer line\n",
-			0
+			0,
+			{{0}},
 		},
 	};
 
 	for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
 		TEST(t_trailer_iterator(tc[i].msg,
-					tc[i].num_expected),
+					tc[i].num_expected,
+					tc[i].contents),
 		     "%s", tc[i].name);
 	}
 }
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* Re: [PATCH v4 02/10] trailer: add unit tests for trailer iterator
  2024-05-02  4:54       ` [PATCH v4 02/10] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
@ 2024-05-02 16:54         ` Junio C Hamano
  0 siblings, 0 replies; 67+ messages in thread
From: Junio C Hamano @ 2024-05-02 16:54 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Linus Arver <linus@ucla.edu>
>
> Test the number of trailers found by the iterator (to be more precise,
> the parsing mechanism which the iterator just walks over) when given
> some some arbitrary log message.

"some some", which I think you (or perhaps I while queuing) fixed in
the previous round.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v4 00/10] Make trailer_info struct private (plus sequencer cleanup)
  2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
                         ` (9 preceding siblings ...)
  2024-05-02  4:54       ` [PATCH v4 10/10] trailer unit tests: inspect iterator contents Linus Arver via GitGitGadget
@ 2024-05-02 17:15       ` Junio C Hamano
  10 siblings, 0 replies; 67+ messages in thread
From: Junio C Hamano @ 2024-05-02 17:15 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Christian Couder, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> NOTE: This series is based on the la/format-trailer-info topic branch (see
> its discussion at [1]).

I took Christian's "I ... had only a few minor comments on some
patches." [*] to imply that everything else without comments were
good, so hopefully this is good to go.  I've amended "some some" in
[2/10] while queueing myself.

Let me mark it for 'next' unless there is some objection in a few
days.

Thanks.


[Reference]

 * https://lore.kernel.org/git/CAP8UFD3Vzs2KvsE2qmx_54UX9ktAngr2FkpKfWVjRMD=t1B7Wg@mail.gmail.com/

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v4 03/10] trailer: teach iterator about non-trailer lines
  2024-05-02  4:54       ` [PATCH v4 03/10] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
@ 2024-05-04 15:33         ` Phillip Wood
  2024-05-05  1:37           ` Linus Arver
  0 siblings, 1 reply; 67+ messages in thread
From: Phillip Wood @ 2024-05-04 15:33 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget, git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver, Linus Arver

Hi Linus

Sorry I'm late to the party here I've left a couple of thoughts below 
but I don't want to derail this series if everyone else is happy.

On 02/05/2024 05:54, Linus Arver via GitGitGadget wrote:
> From: Linus Arver <linus@ucla.edu>
> 
> Previously the iterator did not iterate over non-trailer lines. This was
> somewhat unfortunate, because trailer blocks could have non-trailer
> lines in them since 146245063e (trailer: allow non-trailers in trailer
> block, 2016-10-21), which was before the iterator was created in
> f0939a0eb1 (trailer: add interface for iterating over commit trailers,
> 2020-09-27).
> 
> So if trailer API users wanted to iterate over all lines in a trailer
> block (including non-trailer lines), they could not use the iterator and
> were forced to use the lower-level trailer_info struct directly (which
> provides a raw string array that includes all lines in the trailer
> block).
> 
> Change the iterator's behavior so that we also iterate over non-trailer
> lines, instead of skipping over them. The new "raw" member of the
> iterator allows API users to access previously inaccessible non-trailer
> lines. Reword the variable "trailer" to just "line" because this
> variable can now hold both trailer lines _and_ non-trailer lines.
> 
> The new "raw" member is important because anyone currently not using the
> iterator is using trailer_info's raw string array directly to access
> lines to check what the combined key + value looks like. If we didn't
> provide a "raw" member here, iterator users would have to re-construct
> the unparsed line by concatenating the key and value back together again
> --- which places an undue burden for iterator users.

Comparing the raw line is error prone as it ignores custom separators 
and variations in the amount of space between the key and the value. 
Therefore I'd argue that the sequencer should in fact be comparing the 
trailer key and value separately rather than comparing the whole line. 
There is an issue that we want to add a new Signed-off-by: trailer for 
"C.O. Mitter" when the trailers look like

	Signed-off-by: C.O. Mitter <c.o.mitter@example.com>
	non-trailer-line

but not when they look like

	Signed-off-by: C.O. Mitter <c.o.mitter@example.com>

so we still need some way of indicating that there was a non-trailer 
line after the last trailer though.

> The next commit demonstrates the use of the iterator in sequencer.c as an
> example of where "raw" will be useful, so that it can start using the
> iterator.
> 
> For the existing use of the iterator in builtin/shortlog.c, we don't
> have to change the code there because that code does

An interface that lets the caller pass a flag if they want to know about 
non-trailer lines might be easier to use for the callers that don't want 
to worry about such lines and wouldn't need a justification as to why it 
was safe for existing callers.

Best Wishes

Phillip

>      trailer_iterator_init(&iter, body);
>      while (trailer_iterator_advance(&iter)) {
>          const char *value = iter.val.buf;
> 
>          if (!string_list_has_string(&log->trailers, iter.key.buf))
>              continue;
> 
>          ...
> 
> and the
> 
>          if (!string_list_has_string(&log->trailers, iter.key.buf))
> 
> condition already skips over non-trailer lines (iter.key.buf is empty
> for non-trailer lines, making the comparison still work even with this
> commit).
> 
> Rename "num_expected_trailers" to "num_expected" in
> t/unit-tests/t-trailer.c because the items we iterate over now include
> non-trailer lines.
> 
> Signed-off-by: Linus Arver <linus@ucla.edu>
> ---
>   t/unit-tests/t-trailer.c | 16 +++++++++++-----
>   trailer.c                | 12 +++++-------
>   trailer.h                |  7 +++++++
>   3 files changed, 23 insertions(+), 12 deletions(-)
> 
> diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c
> index c1f897235c7..4f640d2a4b8 100644
> --- a/t/unit-tests/t-trailer.c
> +++ b/t/unit-tests/t-trailer.c
> @@ -1,7 +1,7 @@
>   #include "test-lib.h"
>   #include "trailer.h"
>   
> -static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
> +static void t_trailer_iterator(const char *msg, size_t num_expected)
>   {
>   	struct trailer_iterator iter;
>   	size_t i = 0;
> @@ -11,7 +11,7 @@ static void t_trailer_iterator(const char *msg, size_t num_expected_trailers)
>   		i++;
>   	trailer_iterator_release(&iter);
>   
> -	check_uint(i, ==, num_expected_trailers);
> +	check_uint(i, ==, num_expected);
>   }
>   
>   static void run_t_trailer_iterator(void)
> @@ -19,7 +19,7 @@ static void run_t_trailer_iterator(void)
>   	static struct test_cases {
>   		const char *name;
>   		const char *msg;
> -		size_t num_expected_trailers;
> +		size_t num_expected;
>   	} tc[] = {
>   		{
>   			"empty input",
> @@ -119,7 +119,13 @@ static void run_t_trailer_iterator(void)
>   			"not a trailer line\n"
>   			"not a trailer line\n"
>   			"Signed-off-by: x\n",
> -			1
> +			/*
> +			 * Even though there is only really 1 real "trailer"
> +			 * (Signed-off-by), we still have 4 trailer objects
> +			 * because we still want to iterate through the entire
> +			 * block.
> +			 */
> +			4
>   		},
>   		{
>   			"with non-trailer lines (one too many) in trailer block",
> @@ -162,7 +168,7 @@ static void run_t_trailer_iterator(void)
>   
>   	for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) {
>   		TEST(t_trailer_iterator(tc[i].msg,
> -					tc[i].num_expected_trailers),
> +					tc[i].num_expected),
>   		     "%s", tc[i].name);
>   	}
>   }
> diff --git a/trailer.c b/trailer.c
> index 3e4dab9c065..4700c441442 100644
> --- a/trailer.c
> +++ b/trailer.c
> @@ -1146,17 +1146,15 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
>   
>   int trailer_iterator_advance(struct trailer_iterator *iter)
>   {
> -	while (iter->internal.cur < iter->internal.info.trailer_nr) {
> -		char *trailer = iter->internal.info.trailers[iter->internal.cur++];
> -		int separator_pos = find_separator(trailer, separators);
> -
> -		if (separator_pos < 1)
> -			continue; /* not a real trailer */
> +	if (iter->internal.cur < iter->internal.info.trailer_nr) {
> +		char *line = iter->internal.info.trailers[iter->internal.cur++];
> +		int separator_pos = find_separator(line, separators);
>   
> +		iter->raw = line;
>   		strbuf_reset(&iter->key);
>   		strbuf_reset(&iter->val);
>   		parse_trailer(&iter->key, &iter->val, NULL,
> -			      trailer, separator_pos);
> +			      line, separator_pos);
>   		/* Always unfold values during iteration. */
>   		unfold_value(&iter->val);
>   		return 1;
> diff --git a/trailer.h b/trailer.h
> index 9f42aa75994..7e36da7d13c 100644
> --- a/trailer.h
> +++ b/trailer.h
> @@ -125,6 +125,13 @@ void format_trailers_from_commit(const struct process_trailer_options *,
>    *   trailer_iterator_release(&iter);
>    */
>   struct trailer_iterator {
> +	/*
> +	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
> +	 * key/val pair as part of a trailer block (as the "key" and "val"
> +	 * fields below). If a line fails to parse as a trailer, then the "key"
> +	 * will be the entire line and "val" will be the empty string.
> +	 */
> +	const char *raw;
>   	struct strbuf key;
>   	struct strbuf val;
>   

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v4 03/10] trailer: teach iterator about non-trailer lines
  2024-05-04 15:33         ` Phillip Wood
@ 2024-05-05  1:37           ` Linus Arver
  2024-05-05 14:09             ` Phillip Wood
  0 siblings, 1 reply; 67+ messages in thread
From: Linus Arver @ 2024-05-05  1:37 UTC (permalink / raw)
  To: phillip.wood, Linus Arver via GitGitGadget, git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk,
	Linus Arver

Phillip Wood <phillip.wood123@gmail.com> writes:

> Hi Linus
>
> Sorry I'm late to the party here I've left a couple of thoughts below
> but I don't want to derail this series if everyone else is happy.

Hi Phillip, no problem.

> On 02/05/2024 05:54, Linus Arver via GitGitGadget wrote:
>> From: Linus Arver <linus@ucla.edu>
>>
>> Previously the iterator did not iterate over non-trailer lines. This was
>> somewhat unfortunate, because trailer blocks could have non-trailer
>> lines in them since 146245063e (trailer: allow non-trailers in trailer
>> block, 2016-10-21), which was before the iterator was created in
>> f0939a0eb1 (trailer: add interface for iterating over commit trailers,
>> 2020-09-27).
>>
>> So if trailer API users wanted to iterate over all lines in a trailer
>> block (including non-trailer lines), they could not use the iterator and
>> were forced to use the lower-level trailer_info struct directly (which
>> provides a raw string array that includes all lines in the trailer
>> block).
>>
>> Change the iterator's behavior so that we also iterate over non-trailer
>> lines, instead of skipping over them. The new "raw" member of the
>> iterator allows API users to access previously inaccessible non-trailer
>> lines. Reword the variable "trailer" to just "line" because this
>> variable can now hold both trailer lines _and_ non-trailer lines.
>>
>> The new "raw" member is important because anyone currently not using the
>> iterator is using trailer_info's raw string array directly to access
>> lines to check what the combined key + value looks like. If we didn't
>> provide a "raw" member here, iterator users would have to re-construct
>> the unparsed line by concatenating the key and value back together again
>> --- which places an undue burden for iterator users.
>
> Comparing the raw line is error prone as it ignores custom separators
> and variations in the amount of space between the key and the value.
> Therefore I'd argue that the sequencer should in fact be comparing the
> trailer key and value separately rather than comparing the whole line.

I agree, but that is likely beyond the scope of this series as the
behavior of comparing the whole line was preserved (not introduced) by
this series.

For reference, the "Signed-off-by: " is hardcoded in "sign_off_header"
in sequencer.c, and it is again hardcoded in "git_generated_prefixes" in
trailer.c. We always use the hardcoded key and colon ":" separator in a
few areas, so changing the code to be more precise to check for only the
key (to account for variability in the separator and space around it as
you pointed out) would be a more involved change (I think many tests
would need to be updated).

> There is an issue that we want to add a new Signed-off-by: trailer for
> "C.O. Mitter" when the trailers look like
>
> 	Signed-off-by: C.O. Mitter <c.o.mitter@example.com>
> 	non-trailer-line
>
> but not when they look like
>
> 	Signed-off-by: C.O. Mitter <c.o.mitter@example.com>
>
> so we still need some way of indicating that there was a non-trailer
> line after the last trailer though.

What is the issue, exactly? Also can you clarify if the issue is
introduced by this series (did you spot a regression)?

>> The next commit demonstrates the use of the iterator in sequencer.c as an
>> example of where "raw" will be useful, so that it can start using the
>> iterator.
>>
>> For the existing use of the iterator in builtin/shortlog.c, we don't
>> have to change the code there because that code does
>
> An interface that lets the caller pass a flag if they want to know about
> non-trailer lines might be easier to use for the callers that don't want
> to worry about such lines and wouldn't need a justification as to why it
> was safe for existing callers.

Makes sense. But perhaps such API enhancements belong in a future
series, when other callers that need such flexibility could benefit from
it?

> Best Wishes
>
> Phillip
>
>>      trailer_iterator_init(&iter, body);
>>      while (trailer_iterator_advance(&iter)) {
>>          const char *value = iter.val.buf;
>>
>>          if (!string_list_has_string(&log->trailers, iter.key.buf))
>>              continue;
>>
>>          ...
>>
>> and the
>>
>>          if (!string_list_has_string(&log->trailers, iter.key.buf))
>>
>> condition already skips over non-trailer lines (iter.key.buf is empty
>> for non-trailer lines, making the comparison still work even with this
>> commit).
>>
>> Rename "num_expected_trailers" to "num_expected" in
>> t/unit-tests/t-trailer.c because the items we iterate over now include
>> non-trailer lines.
>>
>> Signed-off-by: Linus Arver <linus@ucla.edu>
>> [...]

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH v4 03/10] trailer: teach iterator about non-trailer lines
  2024-05-05  1:37           ` Linus Arver
@ 2024-05-05 14:09             ` Phillip Wood
  0 siblings, 0 replies; 67+ messages in thread
From: Phillip Wood @ 2024-05-05 14:09 UTC (permalink / raw)
  To: Linus Arver, phillip.wood, Linus Arver via GitGitGadget, git
  Cc: Christian Couder, Junio C Hamano, Emily Shaffer, Josh Steadmon,
	Randall S. Becker, Christian Couder, Kristoffer Haugsbakk

Hi Linus

On 05/05/2024 02:37, Linus Arver wrote:
> Phillip Wood <phillip.wood123@gmail.com> writes:
>> On 02/05/2024 05:54, Linus Arver via GitGitGadget wrote:
>>> From: Linus Arver <linus@ucla.edu>
>>>
>>> The new "raw" member is important because anyone currently not using the
>>> iterator is using trailer_info's raw string array directly to access
>>> lines to check what the combined key + value looks like. If we didn't
>>> provide a "raw" member here, iterator users would have to re-construct
>>> the unparsed line by concatenating the key and value back together again
>>> --- which places an undue burden for iterator users.
>>
>> Comparing the raw line is error prone as it ignores custom separators
>> and variations in the amount of space between the key and the value.
>> Therefore I'd argue that the sequencer should in fact be comparing the
>> trailer key and value separately rather than comparing the whole line.
> 
> I agree, but that is likely beyond the scope of this series as the
> behavior of comparing the whole line was preserved (not introduced) by
> this series.

Right but this series is changing the trailer iterator api to 
accommodate the sub-optimal sequencer code. My thought was that if the 
sequencer did the right thing we wouldn't need to expose the raw line in 
the iterator in the first place.

> For reference, the "Signed-off-by: " is hardcoded in "sign_off_header"
> in sequencer.c, and it is again hardcoded in "git_generated_prefixes" in
> trailer.c. We always use the hardcoded key and colon ":" separator in a
> few areas, so changing the code to be more precise to check for only the
> key (to account for variability in the separator and space around it as
> you pointed out) would be a more involved change (I think many tests
> would need to be updated).

So the worry is that we'd create a "Signed-off-by: " trailer that we 
then couldn't parse because the user didn't have ':' in trailer.separators?

>> There is an issue that we want to add a new Signed-off-by: trailer for
>> "C.O. Mitter" when the trailers look like
>>
>> 	Signed-off-by: C.O. Mitter <c.o.mitter@example.com>
>> 	non-trailer-line
>>
>> but not when they look like
>>
>> 	Signed-off-by: C.O. Mitter <c.o.mitter@example.com>
>>
>> so we still need some way of indicating that there was a non-trailer
>> line after the last trailer though.
> 
> What is the issue, exactly? Also can you clarify if the issue is
> introduced by this series (did you spot a regression)?

There is no regression - the issue is with my suggestion. We only want 
to add an SOB trailer if the last trailer does not match the SOB we're 
adding. If we were to use the existing trailer iterator api in the 
sequencer we would not know that we should add an SOB in the first 
example above as we'd only see the last trailer which matches the SOB 
we're trying to add. We'd still need some way to tell the caller that 
there was a non-trailer line following the last trailer.

>>> The next commit demonstrates the use of the iterator in sequencer.c as an
>>> example of where "raw" will be useful, so that it can start using the
>>> iterator.
>>>
>>> For the existing use of the iterator in builtin/shortlog.c, we don't
>>> have to change the code there because that code does
>>
>> An interface that lets the caller pass a flag if they want to know about
>> non-trailer lines might be easier to use for the callers that don't want
>> to worry about such lines and wouldn't need a justification as to why it
>> was safe for existing callers.
> 
> Makes sense. But perhaps such API enhancements belong in a future
> series, when other callers that need such flexibility could benefit from
> it?

For me the main benefit would be that you don't have to spend time 
explaining why the changes are safe for existing callers because they 
would keep the existing iterator behavor.

Best Wishes

Phillip

^ permalink raw reply	[flat|nested] 67+ messages in thread

end of thread, other threads:[~2024-05-05 14:09 UTC | newest]

Thread overview: 67+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-16  6:27 [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Linus Arver via GitGitGadget
2024-03-16  6:27 ` [PATCH 1/6] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
2024-03-16  6:27 ` [PATCH 2/6] sequencer: use the trailer iterator Linus Arver via GitGitGadget
2024-03-16  6:27 ` [PATCH 3/6] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
2024-03-16  6:27 ` [PATCH 4/6] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
2024-03-16  6:27 ` [PATCH 5/6] trailer: make trailer_info struct private Linus Arver via GitGitGadget
2024-03-16  6:27 ` [PATCH 6/6] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
2024-03-16 17:06 ` [PATCH 0/6] Make trailer_info struct private (plus sequencer cleanup) Junio C Hamano
2024-03-26 22:00 ` Junio C Hamano
2024-04-19  5:36   ` Linus Arver
2024-04-19  5:22 ` [PATCH v2 0/8] " Linus Arver via GitGitGadget
2024-04-19  5:22   ` [PATCH v2 1/8] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
2024-04-19  5:22   ` [PATCH v2 2/8] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
2024-04-19  5:33     ` Linus Arver
2024-04-19 18:46     ` Linus Arver
2024-04-19 21:52     ` Junio C Hamano
2024-04-20  0:14       ` Linus Arver
2024-04-19  5:22   ` [PATCH v2 3/8] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
2024-04-19  5:22   ` [PATCH v2 4/8] sequencer: use the trailer iterator Linus Arver via GitGitGadget
2024-04-23 21:19     ` Junio C Hamano
2024-04-19  5:22   ` [PATCH v2 5/8] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
2024-04-19  5:22   ` [PATCH v2 6/8] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
2024-04-23 23:17     ` Junio C Hamano
2024-04-19  5:22   ` [PATCH v2 7/8] trailer: make trailer_info struct private Linus Arver via GitGitGadget
2024-04-23 23:27     ` Junio C Hamano
2024-04-25  3:17       ` Linus Arver
2024-04-19  5:22   ` [PATCH v2 8/8] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
2024-04-23 23:27     ` Junio C Hamano
2024-04-24  0:27   ` [PATCH v2 0/8] Make trailer_info struct private (plus sequencer cleanup) Junio C Hamano
2024-04-26  0:26   ` [PATCH v3 00/10] " Linus Arver via GitGitGadget
2024-04-26  0:26     ` [PATCH v3 01/10] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
2024-04-26  0:26     ` [PATCH v3 02/10] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
2024-04-26 14:51       ` Christian Couder
2024-04-26 16:20         ` Junio C Hamano
2024-04-26 16:25         ` Linus Arver
2024-04-26  0:26     ` [PATCH v3 03/10] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
2024-04-27 12:50       ` Christian Couder
2024-04-30  4:42         ` Linus Arver
2024-04-30  4:55           ` Linus Arver
2024-04-26  0:26     ` [PATCH v3 04/10] sequencer: use the trailer iterator Linus Arver via GitGitGadget
2024-04-26  0:26     ` [PATCH v3 05/10] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
2024-04-26  0:26     ` [PATCH v3 06/10] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
2024-04-26  0:26     ` [PATCH v3 07/10] trailer: make trailer_info struct private Linus Arver via GitGitGadget
2024-04-26  0:26     ` [PATCH v3 08/10] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
2024-04-26  0:26     ` [PATCH v3 09/10] trailer: document parse_trailers() usage Linus Arver via GitGitGadget
2024-04-26  0:26     ` [PATCH v3 10/10] trailer unit tests: inspect iterator contents Linus Arver via GitGitGadget
2024-04-27 12:51     ` [PATCH v3 00/10] Make trailer_info struct private (plus sequencer cleanup) Christian Couder
2024-05-02  4:54     ` [PATCH v4 " Linus Arver via GitGitGadget
2024-05-02  4:54       ` [PATCH v4 01/10] Makefile: sort UNIT_TEST_PROGRAMS Linus Arver via GitGitGadget
2024-05-02  4:54       ` [PATCH v4 02/10] trailer: add unit tests for trailer iterator Linus Arver via GitGitGadget
2024-05-02 16:54         ` Junio C Hamano
2024-05-02  4:54       ` [PATCH v4 03/10] trailer: teach iterator about non-trailer lines Linus Arver via GitGitGadget
2024-05-04 15:33         ` Phillip Wood
2024-05-05  1:37           ` Linus Arver
2024-05-05 14:09             ` Phillip Wood
2024-05-02  4:54       ` [PATCH v4 04/10] sequencer: use the trailer iterator Linus Arver via GitGitGadget
2024-05-02  4:54       ` [PATCH v4 05/10] interpret-trailers: access trailer_info with new helpers Linus Arver via GitGitGadget
2024-05-02  4:54       ` [PATCH v4 06/10] trailer: make parse_trailers() return trailer_info pointer Linus Arver via GitGitGadget
2024-05-02  4:54       ` [PATCH v4 07/10] trailer: make trailer_info struct private Linus Arver via GitGitGadget
2024-05-02  4:54       ` [PATCH v4 08/10] trailer: retire trailer_info_get() from API Linus Arver via GitGitGadget
2024-05-02  4:54       ` [PATCH v4 09/10] trailer: document parse_trailers() usage Linus Arver via GitGitGadget
2024-05-02  4:54       ` [PATCH v4 10/10] trailer unit tests: inspect iterator contents Linus Arver via GitGitGadget
2024-05-02 17:15       ` [PATCH v4 00/10] Make trailer_info struct private (plus sequencer cleanup) Junio C Hamano
  -- strict thread matches above, loose matches on Subject: below --
2024-01-26 22:38 [PATCH v2 00/10] Enrich Trailer API Linus Arver via GitGitGadget
2024-01-31  1:22 ` [PATCH v3 " Linus Arver via GitGitGadget
2024-01-31  1:22   ` [PATCH v3 04/10] sequencer: use the trailer iterator Linus Arver via GitGitGadget
2024-02-01 18:06     ` Junio C Hamano
2024-02-01 19:14       ` Linus Arver
2024-02-03  0:39         ` Linus Arver

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).