Git Mailing List Archive mirror
 help / color / mirror / code / Atom feed
* [PATCH 0/1] git-grep: improve the --show-function behaviour
@ 2023-09-11 12:11 Oleg Nesterov
  2023-09-11 12:12 ` [PATCH 1/1] " Oleg Nesterov
  0 siblings, 1 reply; 15+ messages in thread
From: Oleg Nesterov @ 2023-09-11 12:11 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Ævar Arnfjörð Bjarmason, Calvin Wan,
	Carlo Marcelo Arenas Belón, Elijah Newren, Jeff King,
	Linus Torvalds, Mathias Krause, René Scharfe, Taylor Blau,
	git

Hello,

I have never looked into the git sources before, so I am spamming
a lot of people found in git-log grep.c

Yesterday I tried to teach my text editor to use "git grep -p" and
I noticed that it doesn't work as I'd expect. The changelog has a
simple test-case, but let me also provide "real life" example.

When I do

	:git-grep -pw pid kernel/sys.c

in my editor before this patch, I get

	kernel/sys.c              224 sys_setpriority          struct pid *pgrp;
	kernel/sys.c              294 sys_getpriority          struct pid *pgrp;
	kernel/sys.c              952                          * Note, despite the name, this returns the tgid not the pid.  The tgid and
	kernel/sys.c              953                          * the pid are identical unless CLONE_THREAD was specified on clone() in
	kernel/sys.c              963                          /* Thread ID - the internal kernel "pid" */
	kernel/sys.c              977 sys_getppid              int pid;
	kernel/sys.c              980 sys_getppid              pid = task_tgid_vnr(rcu_dereference(current->real_parent));
	kernel/sys.c              983 sys_getppid              return pid;
	kernel/sys.c             1073                          SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
	kernel/sys.c             1077 sys_times                struct pid *pgrp;
	kernel/sys.c             1080 sys_times                if (!pid)
	kernel/sys.c             1081 sys_times                pid = task_pid_vnr(group_leader);
	kernel/sys.c             1083 sys_times                pgid = pid;
	kernel/sys.c             1094 sys_times                p = find_task_by_vpid(pid);
	kernel/sys.c             1120 sys_times                if (pgid != pid) {
	kernel/sys.c             1144                          static int do_getpgid(pid_t pid)
	kernel/sys.c             1147 sys_times                struct pid *grp;
	kernel/sys.c             1151 sys_times                if (!pid)
	kernel/sys.c             1155 sys_times                p = find_task_by_vpid(pid);
	kernel/sys.c             1172                          SYSCALL_DEFINE1(getpgid, pid_t, pid)
	kernel/sys.c             1174 sys_times                return do_getpgid(pid);
	kernel/sys.c             1186                          SYSCALL_DEFINE1(getsid, pid_t, pid)
	kernel/sys.c             1189 sys_getpgrp              struct pid *sid;
	kernel/sys.c             1193 sys_getpgrp              if (!pid)
	kernel/sys.c             1197 sys_getpgrp              p = find_task_by_vpid(pid);
	kernel/sys.c             1214                          static void set_special_pids(struct pid *pid)
	kernel/sys.c             1218 sys_getpgrp              if (task_session(curr) != pid)
	kernel/sys.c             1219 sys_getpgrp              change_pid(curr, PIDTYPE_SID, pid);
	kernel/sys.c             1221 sys_getpgrp              if (task_pgrp(curr) != pid)
	kernel/sys.c             1222 sys_getpgrp              change_pid(curr, PIDTYPE_PGID, pid);
	kernel/sys.c             1228 ksys_setsid              struct pid *sid = task_pid(group_leader);
	kernel/sys.c             1684                          SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
	kernel/sys.c             1705 check_prlimit_permission tsk = pid ? find_task_by_vpid(pid) : current;

And only the first 5 funcnames are correct.

After this patch I get

	kernel/sys.c              224 sys_setpriority          struct pid *pgrp;
	kernel/sys.c              294 sys_getpriority          struct pid *pgrp;
	kernel/sys.c              952                          * Note, despite the name, this returns the tgid not the pid.  The tgid and
	kernel/sys.c              953                          * the pid are identical unless CLONE_THREAD was specified on clone() in
	kernel/sys.c              963                          /* Thread ID - the internal kernel "pid" */
	kernel/sys.c              977 sys_getppid              int pid;
	kernel/sys.c              980 sys_getppid              pid = task_tgid_vnr(rcu_dereference(current->real_parent));
	kernel/sys.c              983 sys_getppid              return pid;
	kernel/sys.c             1073                          SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
	kernel/sys.c             1077 sys_setpgid              struct pid *pgrp;
	kernel/sys.c             1080 sys_setpgid              if (!pid)
	kernel/sys.c             1081 sys_setpgid              pid = task_pid_vnr(group_leader);
	kernel/sys.c             1083 sys_setpgid              pgid = pid;
	kernel/sys.c             1094 sys_setpgid              p = find_task_by_vpid(pid);
	kernel/sys.c             1120 sys_setpgid              if (pgid != pid) {
	kernel/sys.c             1144                          static int do_getpgid(pid_t pid)
	kernel/sys.c             1147 do_getpgid               struct pid *grp;
	kernel/sys.c             1151 do_getpgid               if (!pid)
	kernel/sys.c             1155 do_getpgid               p = find_task_by_vpid(pid);
	kernel/sys.c             1172                          SYSCALL_DEFINE1(getpgid, pid_t, pid)
	kernel/sys.c             1174 sys_getpgid              return do_getpgid(pid);
	kernel/sys.c             1186                          SYSCALL_DEFINE1(getsid, pid_t, pid)
	kernel/sys.c             1189 sys_getsid               struct pid *sid;
	kernel/sys.c             1193 sys_getsid               if (!pid)
	kernel/sys.c             1197 sys_getsid               p = find_task_by_vpid(pid);
	kernel/sys.c             1214                          static void set_special_pids(struct pid *pid)
	kernel/sys.c             1218 set_special_pids         if (task_session(curr) != pid)
	kernel/sys.c             1219 set_special_pids         change_pid(curr, PIDTYPE_SID, pid);
	kernel/sys.c             1221 set_special_pids         if (task_pgrp(curr) != pid)
	kernel/sys.c             1222 set_special_pids         change_pid(curr, PIDTYPE_PGID, pid);
	kernel/sys.c             1228 ksys_setsid              struct pid *sid = task_pid(group_leader);
	kernel/sys.c             1684                          SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
	kernel/sys.c             1705 sys_prlimit64            tsk = pid ? find_task_by_vpid(pid) : current;

and everythig looks correct.

Oleg.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-11 12:11 [PATCH 0/1] git-grep: improve the --show-function behaviour Oleg Nesterov
@ 2023-09-11 12:12 ` Oleg Nesterov
  2023-09-11 20:11   ` René Scharfe
  2023-09-11 22:34   ` Junio C Hamano
  0 siblings, 2 replies; 15+ messages in thread
From: Oleg Nesterov @ 2023-09-11 12:12 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Ævar Arnfjörð Bjarmason, Calvin Wan,
	Carlo Marcelo Arenas Belón, Elijah Newren, Jeff King,
	Linus Torvalds, Mathias Krause, René Scharfe, Taylor Blau,
	git

show_funcname_line() returns when "lno <= opt->last_shown" and this
is not right in that the ->last_shown line (which matched the pattern)
can also have the actual function name we need to report.

Change this code to check "lno < opt->last_shown". While at it, move
this check up to avoid the unnecessary "find the previous bol" loop.

Note that --lno can't underflow, lno==0 is not possible in this loop.

Simple test-case:

	$ cat TEST.c
	void func(void);

	void func1(xxx)
	{
		use1(xxx);
	}

	void func2(xxx)
	{
		use2(xxx);
	}

	$ git grep --untracked -pn xxx TEST.c

before the patch:

	TEST.c=1=void func(void);
	TEST.c:3:void func1(xxx)
	TEST.c:5:       use1(xxx);
	TEST.c:8:void func2(xxx)
	TEST.c:10:      use2(xxx);

after the patch:

	TEST.c=1=void func(void);
	TEST.c:3:void func1(xxx)
	TEST.c=3=void func1(xxx)
	TEST.c:5:       use1(xxx);
	TEST.c:8:void func2(xxx)
	TEST.c=8=void func2(xxx)
	TEST.c:10:      use2(xxx);

which looks much better to me.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 grep.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/grep.c b/grep.c
index 0904d55b24..7cad8352f4 100644
--- a/grep.c
+++ b/grep.c
@@ -1350,12 +1350,11 @@ static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
 	while (bol > gs->buf) {
 		const char *eol = --bol;
 
+		if (--lno < opt->last_shown)
+			break;
+
 		while (bol > gs->buf && bol[-1] != '\n')
 			bol--;
-		lno--;
-
-		if (lno <= opt->last_shown)
-			break;
 
 		if (match_funcname(opt, gs, bol, eol)) {
 			show_line(opt, bol, eol, gs->name, lno, 0, '=');
-- 
2.25.1.362.g51ebf55



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-11 12:12 ` [PATCH 1/1] " Oleg Nesterov
@ 2023-09-11 20:11   ` René Scharfe
  2023-09-11 21:54     ` Oleg Nesterov
  2023-09-11 22:34   ` Junio C Hamano
  1 sibling, 1 reply; 15+ messages in thread
From: René Scharfe @ 2023-09-11 20:11 UTC (permalink / raw)
  To: Oleg Nesterov, Junio C Hamano
  Cc: Ævar Arnfjörð Bjarmason, Calvin Wan,
	Carlo Marcelo Arenas Belón, Elijah Newren, Jeff King,
	Linus Torvalds, Mathias Krause, Taylor Blau, git

Am 11.09.23 um 14:12 schrieb Oleg Nesterov:
> show_funcname_line() returns when "lno <= opt->last_shown" and this
> is not right in that the ->last_shown line (which matched the pattern)
> can also have the actual function name we need to report.
>
> Change this code to check "lno < opt->last_shown". While at it, move
> this check up to avoid the unnecessary "find the previous bol" loop.
>
> Note that --lno can't underflow, lno==0 is not possible in this loop.
>
> Simple test-case:
>
> 	$ cat TEST.c
> 	void func(void);
>
> 	void func1(xxx)
> 	{
> 		use1(xxx);
> 	}
>
> 	void func2(xxx)
> 	{
> 		use2(xxx);
> 	}
>
> 	$ git grep --untracked -pn xxx TEST.c
>
> before the patch:
>
> 	TEST.c=1=void func(void);
> 	TEST.c:3:void func1(xxx)
> 	TEST.c:5:       use1(xxx);
> 	TEST.c:8:void func2(xxx)
> 	TEST.c:10:      use2(xxx);
>
> after the patch:
>
> 	TEST.c=1=void func(void);
> 	TEST.c:3:void func1(xxx)
> 	TEST.c=3=void func1(xxx)
> 	TEST.c:5:       use1(xxx);
> 	TEST.c:8:void func2(xxx)
> 	TEST.c=8=void func2(xxx)
> 	TEST.c:10:      use2(xxx);
>
> which looks much better to me.

Interesting idea to treat function lines as annotations of matches
instead of as special context.  Showing lines twice feels wasteful, but
at least for -p it might be justifiable from that angle.  Wouldn't you
have to repeat function line 3 before the match in line 8, though?

The reason for not repeating a matched function line was that it
doesn't add much information under the assumption that it's easy to
identify function lines visually.  I assume this still holds, but
perhaps it doesn't for more complicated languages?

I have to admit that I almost never use --show-function, but instead
use the related --function-context a lot.  So my practical experience
with the former option is very limited.

>  grep.c | 7 +++----
>  1 file changed, 3 insertions(+), 4 deletions(-)

The patch would need to update Documentation/git-grep.txt as well to
reflect the changed output.

René

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-11 20:11   ` René Scharfe
@ 2023-09-11 21:54     ` Oleg Nesterov
  0 siblings, 0 replies; 15+ messages in thread
From: Oleg Nesterov @ 2023-09-11 21:54 UTC (permalink / raw)
  To: René Scharfe
  Cc: Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Calvin Wan, Carlo Marcelo Arenas Belón, Elijah Newren,
	Jeff King, Linus Torvalds, Mathias Krause, Taylor Blau, git

Hi René,

Thanks for feedback. I am already sleeping but let me try to reply anyway,
even if I don't really understand you.

On 09/11, René Scharfe wrote:
>
> Am 11.09.23 um 14:12 schrieb Oleg Nesterov:
> > show_funcname_line() returns when "lno <= opt->last_shown" and this
> > is not right in that the ->last_shown line (which matched the pattern)
> > can also have the actual function name we need to report.
> >
> > Change this code to check "lno < opt->last_shown". While at it, move
> > this check up to avoid the unnecessary "find the previous bol" loop.
> >
> > Note that --lno can't underflow, lno==0 is not possible in this loop.
> >
> > Simple test-case:
> >
> > 	$ cat TEST.c
> > 	void func(void);
> >
> > 	void func1(xxx)
> > 	{
> > 		use1(xxx);
> > 	}
> >
> > 	void func2(xxx)
> > 	{
> > 		use2(xxx);
> > 	}
> >
> > 	$ git grep --untracked -pn xxx TEST.c
> >
> > before the patch:
> >
> > 	TEST.c=1=void func(void);
> > 	TEST.c:3:void func1(xxx)
> > 	TEST.c:5:       use1(xxx);
> > 	TEST.c:8:void func2(xxx)
> > 	TEST.c:10:      use2(xxx);
> >
> > after the patch:
> >
> > 	TEST.c=1=void func(void);
> > 	TEST.c:3:void func1(xxx)
> > 	TEST.c=3=void func1(xxx)
> > 	TEST.c:5:       use1(xxx);
> > 	TEST.c:8:void func2(xxx)
> > 	TEST.c=8=void func2(xxx)
> > 	TEST.c:10:      use2(xxx);
> >
> > which looks much better to me.
>
> Interesting idea to treat function lines as annotations of matches
> instead of as special context.

Sorry, I don't understand... Let me repeat I am not familiar with this
code and terminology. Could you spell please?

> Showing lines twice feels wasteful, but
> at least for -p it might be justifiable from that angle.

Just in case... say, "func1" is reported twice only when it is really
needed. From the "after the patch" output above:

	TEST.c:3:void func1(xxx)

this is what we already have without this patch

	TEST.c=3=void func1(xxx)

this is what we have with this patch because the next

	TEST.c:5:       use1(xxx);

line needs the proper funcname line, and without this patch it would be
"void func()" which has nothing to do with use1(xxx),

If I do, say,

	./git grep --untracked -pn func1 TEST.c

then (with or without this patch) the output is

	TEST.c=1=void func(void);
	TEST.c:3:void func1(xxx)

in this case there is no reason to report "=void func1(xxx)".


> Wouldn't you
> have to repeat function line 3 before the match in line 8, though?

Why?

> The reason for not repeating a matched function line was that it
> doesn't add much information under the assumption that it's easy to
> identify function lines visually.

But it is not. Lets look again at the "before the patch:" output above,

 	TEST.c=1=void func(void);
 	TEST.c:3:void func1(xxx)
 	TEST.c:5:       use1(xxx);
 	TEST.c:8:void func2(xxx)
 	TEST.c:10:      use2(xxx);

it looks as if every "xxx" match is inside the (unrelated) func().

OK, "visually" you can also notice the "void funcX(xxx)" declarations
and understand whats going on.

But a) I don't think this is always easy, and b) it is certainly not
easy when you use "git-grep -p" in scripts. Please see 0/1.

> The patch would need to update Documentation/git-grep.txt as well to
> reflect the changed output.

Hmm... From Documentation/git-grep.txt:

	-p::
	--show-function::
		Show the preceding line that contains the function name of
		the match, unless the matching line is a function name itself.
		...

this is still true after this patch. How do you think I should update this
section?

Oleg.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-11 12:12 ` [PATCH 1/1] " Oleg Nesterov
  2023-09-11 20:11   ` René Scharfe
@ 2023-09-11 22:34   ` Junio C Hamano
  2023-09-11 23:17     ` Oleg Nesterov
  1 sibling, 1 reply; 15+ messages in thread
From: Junio C Hamano @ 2023-09-11 22:34 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Ævar Arnfjörð Bjarmason, Calvin Wan,
	Carlo Marcelo Arenas Belón, Elijah Newren, Jeff King,
	Linus Torvalds, Mathias Krause, René Scharfe, Taylor Blau,
	git

Oleg Nesterov <oleg@redhat.com> writes:

> 	$ git grep --untracked -pn xxx TEST.c
>
> before the patch:
>
> 	TEST.c=1=void func(void);
> 	TEST.c:3:void func1(xxx)
> 	TEST.c:5:       use1(xxx);
> 	TEST.c:8:void func2(xxx)
> 	TEST.c:10:      use2(xxx);
>
> after the patch:
>
> 	TEST.c=1=void func(void);
> 	TEST.c:3:void func1(xxx)
> 	TEST.c=3=void func1(xxx)
> 	TEST.c:5:       use1(xxx);
> 	TEST.c:8:void func2(xxx)
> 	TEST.c=8=void func2(xxx)
> 	TEST.c:10:      use2(xxx);
>
> which looks much better to me.

The "better" is often subjective.  The former is showing what is
going on in the TEST.c code very clearly without wasting valuable
vertical screen real estate, at least to me.  If we want to adopt
the proposed behaviour, which I would recommend against, the same
patch should update the documentation, which currently says

    Show the preceding line that contains the function name of the
    match, unless the matching line is a function name itself.

which has allowed the users to depend on the current behaviour for
practically forever since the feature was introduced by 2944e4e6
(grep: add option -p/--show-function, 2009-07-02).

As René said, I think -p/--show-function is a rather less used
option in modern Git where "--function-context", which back in
2944e4e6 did not exist, tend to be a much more useful option, so the
fallout from such a change may be small, but it still is a backward
incompatible behaviour change that needs to be handled with care.

Thanks.

> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
>  grep.c | 7 +++----
>  1 file changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/grep.c b/grep.c
> index 0904d55b24..7cad8352f4 100644
> --- a/grep.c
> +++ b/grep.c
> @@ -1350,12 +1350,11 @@ static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
>  	while (bol > gs->buf) {
>  		const char *eol = --bol;
>  
> +		if (--lno < opt->last_shown)
> +			break;
> +
>  		while (bol > gs->buf && bol[-1] != '\n')
>  			bol--;
> -		lno--;
> -
> -		if (lno <= opt->last_shown)
> -			break;
>  
>  		if (match_funcname(opt, gs, bol, eol)) {
>  			show_line(opt, bol, eol, gs->name, lno, 0, '=');

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-11 22:34   ` Junio C Hamano
@ 2023-09-11 23:17     ` Oleg Nesterov
  2023-09-12 13:04       ` Oleg Nesterov
  0 siblings, 1 reply; 15+ messages in thread
From: Oleg Nesterov @ 2023-09-11 23:17 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Ævar Arnfjörð Bjarmason, Calvin Wan,
	Carlo Marcelo Arenas Belón, Elijah Newren, Jeff King,
	Linus Torvalds, Mathias Krause, René Scharfe, Taylor Blau,
	git

On 09/11, Junio C Hamano wrote:
>
> Oleg Nesterov <oleg@redhat.com> writes:
>
> > 	$ git grep --untracked -pn xxx TEST.c
> >
> > before the patch:
> >
> > 	TEST.c=1=void func(void);
> > 	TEST.c:3:void func1(xxx)
> > 	TEST.c:5:       use1(xxx);
> > 	TEST.c:8:void func2(xxx)
> > 	TEST.c:10:      use2(xxx);
> >
> > after the patch:
> >
> > 	TEST.c=1=void func(void);
> > 	TEST.c:3:void func1(xxx)
> > 	TEST.c=3=void func1(xxx)
> > 	TEST.c:5:       use1(xxx);
> > 	TEST.c:8:void func2(xxx)
> > 	TEST.c=8=void func2(xxx)
> > 	TEST.c:10:      use2(xxx);
> >
> > which looks much better to me.
>
> The "better" is often subjective.

Sure. that is why I added "to me".

> The former is showing what is
> going on in the TEST.c code very clearly without wasting valuable
> vertical screen real estate, at least to me.

very clearly? As you probably understand this is subjective as well.
But yes, you too added "at least to me" ;)

However, certainly this is not true when you use git-grep in scripts,
please see 0/1.

> If we want to adopt
> the proposed behaviour, which I would recommend against, the same
> patch should update the documentation, which currently says
>
>     Show the preceding line that contains the function name of the
>     match, unless the matching line is a function name itself.

And I still don't think this patch changes the documented behaviour.
See my reply to Rene.

Again, if you do

	./git grep -pn --untracked func1 TEST.c

with this patch applied, the output is still

	TEST.c=1=void func(void);
	TEST.c:3:void func1(xxx)

which iiuc matches the documentation above.

Now,

	./git grep -pn --untracked xxx TEST.c

adds the additional

	TEST.c=3=void func1(xxx)
	...
	TEST.c=8=void func2(xxx)

but how does this contradict with the documentation above?

the matching lines are use1(xxx) and use2(xxx), there are NOT
"the matching line is a function name itself".

> As René said, I think -p/--show-function is a rather less used
> option in modern Git where "--function-context", which back in
> 2944e4e6 did not exist, tend to be a much more useful option,

Well, not to me. And you know, I am a git user too ;)

> but it still is a backward
> incompatible behaviour change that needs to be handled with care.

And this is what I still don't understand.

> Thanks.

Thanks,

Oleg.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-11 23:17     ` Oleg Nesterov
@ 2023-09-12 13:04       ` Oleg Nesterov
  2023-09-12 13:51         ` Oleg Nesterov
  0 siblings, 1 reply; 15+ messages in thread
From: Oleg Nesterov @ 2023-09-12 13:04 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Ævar Arnfjörð Bjarmason, Calvin Wan,
	Carlo Marcelo Arenas Belón, Elijah Newren, Jeff King,
	Linus Torvalds, Mathias Krause, René Scharfe, Taylor Blau,
	git, Alexey Gladkov

René, Junio,

I don't like the fact we can't understand each other ;) Could you
please explain why do you think this patch should update the docs?

Please forget about my patch for the moment. Lets start from the very
beginning:

	-p::
	--show-function::
		Show the preceding line that contains the function name of
		the match, unless the matching line is a function name itself.

and in my opinion, it is the current behaviour that doesn't match the
documentation.

-------------------------------------------------------------------------

	$ cat TEST1.c
	void func1()
	{
	}
	void func2()
	{
	}

	$ git grep --untracked -pn func2 TEST1.c
	TEST1.c=1=void func1()
	TEST1.c:4:void func2()

in this case the matching line is "void func2()" and it is also a function
name itself, in this case git-grep should not show "=void func1()" which is
"the preceding line that contains the function name of the match.

But it does. So perhaps git-grep needs another change, something like

	if (match_funcname(opt, gs, bol, end_of_line(...)))
		return;

at the start of show_funcname_line(), but my patch does not change this
behaviour.

--------------------------------------------------------------------------

	$ cat TEST2.c
	void func(xxx)
	{
		use(xxx);
	}

	$ git grep --untracked -pn xxx TEST2.c
	TEST2.c:1:void func(xxx)
	TEST2.c:3:      use(xxx)

the 2nd match is use(xxx) and it is not a function name itself, in this
case git-grep should "Show the preceding line that contains the function
name of the match.

But it doesn't. To me, this behaviour looks as

		Show the preceding line that contains the function name of
		the match, unless the _PREVIOUS_ matching line is a function
		name itself.

Now, with my patch we have

	$ ./git grep --untracked -pn xxx TEST2.c
	TEST2.c:1:void func(xxx)
	TEST2.c=1=void func(xxx)
	TEST2.c:3:      use(xxx);

and unless I am totatlly confused this does match the documentation.

Oleg.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-12 13:04       ` Oleg Nesterov
@ 2023-09-12 13:51         ` Oleg Nesterov
  2023-09-12 18:07           ` René Scharfe
  0 siblings, 1 reply; 15+ messages in thread
From: Oleg Nesterov @ 2023-09-12 13:51 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Ævar Arnfjörð Bjarmason, Calvin Wan,
	Carlo Marcelo Arenas Belón, Elijah Newren, Jeff King,
	Linus Torvalds, Mathias Krause, René Scharfe, Taylor Blau,
	git, Alexey Gladkov

On 09/12, Oleg Nesterov wrote:
>
> René, Junio,
>
> I don't like the fact we can't understand each other ;) Could you
> please explain why do you think this patch should update the docs?
>
> Please forget about my patch for the moment. Lets start from the very
> beginning:
>
> 	-p::
> 	--show-function::
> 		Show the preceding line that contains the function name of
> 		the match, unless the matching line is a function name itself.
>
> and in my opinion, it is the current behaviour that doesn't match the
> documentation.
>
> -------------------------------------------------------------------------
>
> 	$ cat TEST1.c
> 	void func1()
> 	{
> 	}
> 	void func2()
> 	{
> 	}
>
> 	$ git grep --untracked -pn func2 TEST1.c
> 	TEST1.c=1=void func1()
> 	TEST1.c:4:void func2()
>
> in this case the matching line is "void func2()" and it is also a function
> name itself, in this case git-grep should not show "=void func1()" which is
> "the preceding line that contains the function name of the match.
>
> But it does. So perhaps git-grep needs another change, something like
>
> 	if (match_funcname(opt, gs, bol, end_of_line(...)))
> 		return;
>
> at the start of show_funcname_line(), but my patch does not change this
> behaviour.
>
> --------------------------------------------------------------------------
>
> 	$ cat TEST2.c
> 	void func(xxx)
> 	{
> 		use(xxx);
> 	}
>
> 	$ git grep --untracked -pn xxx TEST2.c
> 	TEST2.c:1:void func(xxx)
> 	TEST2.c:3:      use(xxx)
>
> the 2nd match is use(xxx) and it is not a function name itself, in this
> case git-grep should "Show the preceding line that contains the function
> name of the match.
>
> But it doesn't. To me, this behaviour looks as
>
> 		Show the preceding line that contains the function name of
> 		the match, unless the _PREVIOUS_ matching line is a function
> 		name itself.
>
> Now, with my patch we have
>
> 	$ ./git grep --untracked -pn xxx TEST2.c
> 	TEST2.c:1:void func(xxx)
> 	TEST2.c=1=void func(xxx)
> 	TEST2.c:3:      use(xxx);
>
> and unless I am totatlly confused this does match the documentation.

So, just in case, please see V2 below. In my opinion it _fixes_ the
current behaviour. With this patch

	$ ./git grep --untracked -pn func2 TEST1.c
	TEST1.c:4:void func2()

	$ ./git grep --untracked -pn xxx TEST2.c
	TEST2.c:1:void func(xxx)
	TEST2.c=1=void func(xxx)
	TEST2.c:3:      use(xxx);

Or I am totally confused?

Oleg.
---

diff --git a/grep.c b/grep.c
index 0904d55b24..c240c4bfa1 100644
--- a/grep.c
+++ b/grep.c
@@ -1347,15 +1347,19 @@ static int match_funcname(struct grep_opt *opt, struct grep_source *gs,
 static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
 			       const char *bol, unsigned lno)
 {
+	unsigned long left = bol - gs->buf;
+
+	if (match_funcname(opt, gs, bol, end_of_line(bol, &left)))
+		return;
+
 	while (bol > gs->buf) {
 		const char *eol = --bol;
 
+		if (--lno < opt->last_shown)
+			break;
+
 		while (bol > gs->buf && bol[-1] != '\n')
 			bol--;
-		lno--;
-
-		if (lno <= opt->last_shown)
-			break;
 
 		if (match_funcname(opt, gs, bol, eol)) {
 			show_line(opt, bol, eol, gs->name, lno, 0, '=');


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-12 13:51         ` Oleg Nesterov
@ 2023-09-12 18:07           ` René Scharfe
  2023-09-13  0:31             ` Junio C Hamano
  2023-09-13 10:15             ` Oleg Nesterov
  0 siblings, 2 replies; 15+ messages in thread
From: René Scharfe @ 2023-09-12 18:07 UTC (permalink / raw)
  To: Oleg Nesterov, Junio C Hamano
  Cc: Ævar Arnfjörð Bjarmason, Calvin Wan,
	Carlo Marcelo Arenas Belón, Elijah Newren, Jeff King,
	Linus Torvalds, Mathias Krause, Taylor Blau, git, Alexey Gladkov

Am 12.09.23 um 15:51 schrieb Oleg Nesterov:
> On 09/12, Oleg Nesterov wrote:
>>
>> René, Junio,
>>
>> I don't like the fact we can't understand each other ;) Could you
>> please explain why do you think this patch should update the docs?

Good thinking!  And thank you for the examples and you patience!

>> Please forget about my patch for the moment. Lets start from the very
>> beginning:
>>
>> 	-p::
>> 	--show-function::
>> 		Show the preceding line that contains the function name of
>> 		the match, unless the matching line is a function name itself.
>>
>> and in my opinion, it is the current behaviour that doesn't match the
>> documentation.>>
>> -------------------------------------------------------------------------
>>
>> 	$ cat TEST1.c
>> 	void func1()
>> 	{
>> 	}
>> 	void func2()
>> 	{
>> 	}
>>
>> 	$ git grep --untracked -pn func2 TEST1.c
>> 	TEST1.c=1=void func1()
>> 	TEST1.c:4:void func2()
>>
>> in this case the matching line is "void func2()" and it is also a function
>> name itself, in this case git-grep should not show "=void func1()" which is
>> "the preceding line that contains the function name of the match.

Makes sense.

>> But it does.

Drat! ;)

The option -p came from diff(1).  I thought diff change lines equivalent
to grep match lines and hunk headers to the new = lines.  What does diff
do with the example file?

   $ diff -U0 -p TEST1.c <(sed s/2/3/ TEST1.c)
   --- TEST1.c	2023-09-12 17:59:04
   +++ /dev/fd/11	2023-09-12 18:54:03
   @@ -4 +4 @@ void func1()
   -void func2()
   +void func3()

It shows func1 in the hunk header, i.e. the next function line before
the changed line.  So that is at least consistent between git grep and
the original -p.

The analogy breaks when it comes to how often a function line is shown:
hunk headers for multiple changes in the same function show the same
function line, but git grep doesn't repeat function lines.  I did that
on purpose, possibly because doesn't have an equivalent of hunk headers
and only allows selecting and showing a subset of the lines of a file.

Except it actually does have the -- lines for separating match contexts,
which are kind of similar.  That looks a bit weird currently:

   $ git grep --untracked -C1 -np func2 TEST1.c
   TEST1.c=1=void func1()
   --
   TEST1.c-3-}
   TEST1.c:4:void func2()
   TEST1.c-5-{

The function line is separated from the match plus context, but you
could argue that they belong together.

>> So perhaps git-grep needs another change, something like
>>
>> 	if (match_funcname(opt, gs, bol, end_of_line(...)))
>> 		return;
>>
>> at the start of show_funcname_line(), but my patch does not change this
>> behaviour.

Yes, to make it match the documentation it would need something like
that.  (Though I'd add a match_funcname() call before the
show_funcname_line() call in grep_source_1() instead, as it already has
the eol value.)

>>
>> --------------------------------------------------------------------------
>>
>> 	$ cat TEST2.c
>> 	void func(xxx)
>> 	{
>> 		use(xxx);
>> 	}
>>
>> 	$ git grep --untracked -pn xxx TEST2.c
>> 	TEST2.c:1:void func(xxx)
>> 	TEST2.c:3:      use(xxx)
>>
>> the 2nd match is use(xxx) and it is not a function name itself, in this
>> case git-grep should "Show the preceding line that contains the function
>> name of the match.
>>
>> But it doesn't.

The corresponding function line (line 1) is shown, as a match (with a
colon).  No function line is shown for the first match because it
doesn't have any, being the first line of the file.  So this matches the
documentation at least.

>> To me, this behaviour looks as
>>
>> 		Show the preceding line that contains the function name of
>> 		the match, unless the _PREVIOUS_ matching line is a function
>> 		name itself.

To me it looks like:

		Show the preceding line that contains the function name of
		the match.

("Show" meaning "show once", not "show for each match again and again".)

Or:

		Show the preceding line that contains the function name of
		the match, unless it is already shown for a different
		reason, e.g. as a match or as the function line of a
		previous match.

>> Now, with my patch we have
>>
>> 	$ ./git grep --untracked -pn xxx TEST2.c
>> 	TEST2.c:1:void func(xxx)
>> 	TEST2.c=1=void func(xxx)
>> 	TEST2.c:3:      use(xxx);
>>
>> and unless I am totatlly confused this does match the documentation.
>
> So, just in case, please see V2 below. In my opinion it _fixes_ the
> current behaviour. With this patch
>
> 	$ ./git grep --untracked -pn func2 TEST1.c
> 	TEST1.c:4:void func2()

Indeed that matches the letter of the documentation.

> 	$ ./git grep --untracked -pn xxx TEST2.c
> 	TEST2.c:1:void func(xxx)
> 	TEST2.c=1=void func(xxx)
> 	TEST2.c:3:      use(xxx);

That one as well.

> Or I am totally confused?

No, I think the documentation is wrong.  I'd simply remove the part
after the comma, but there are probably better options.

René

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-12 18:07           ` René Scharfe
@ 2023-09-13  0:31             ` Junio C Hamano
  2023-09-13  9:46               ` Oleg Nesterov
  2023-09-14 19:34               ` René Scharfe
  2023-09-13 10:15             ` Oleg Nesterov
  1 sibling, 2 replies; 15+ messages in thread
From: Junio C Hamano @ 2023-09-13  0:31 UTC (permalink / raw)
  To: René Scharfe
  Cc: Oleg Nesterov, Ævar Arnfjörð Bjarmason,
	Calvin Wan, Carlo Marcelo Arenas Belón, Elijah Newren,
	Jeff King, Linus Torvalds, Mathias Krause, Taylor Blau, git,
	Alexey Gladkov

René Scharfe <l.s.r@web.de> writes:

>>> To me, this behaviour looks as
>>>
>>> 		Show the preceding line that contains the function name of
>>> 		the match, unless the _PREVIOUS_ matching line is a function
>>> 		name itself.
>
> To me it looks like:
>
> 		Show the preceding line that contains the function name of
> 		the match.
>
> ("Show" meaning "show once", not "show for each match again and again".)
>
> Or:
>
> 		Show the preceding line that contains the function name of
> 		the match, unless it is already shown for a different
> 		reason, e.g. as a match or as the function line of a
> 		previous match.

Wow, that was a mouthful, but matches my understanding.  I naïvely
thought "when showing a hit, we may add the line that matches the
function header pattern before the hit even that header line does
not hit the grep pattern. But if the header line does hit the grep
pattern, we do not bother show the same thing twice." was a
reasonable goal to have.

> Indeed that matches the letter of the documentation.
>
>> 	$ ./git grep --untracked -pn xxx TEST2.c
>> 	TEST2.c:1:void func(xxx)
>> 	TEST2.c=1=void func(xxx)
>> 	TEST2.c:3:      use(xxx);
>
> That one as well.
>
>> Or I am totally confused?
>
> No, I think the documentation is wrong.  I'd simply remove the part
> after the comma, but there are probably better options.

Documentation may not match the behaviour, but do we know what the
behaviour we want is?  To me, the last example that shows the same
line twice (one as a real hit, the other because of "-p") looks a
bit counter-intuitive for the purpose of "help me locate where the
grep hits are".  I dunno.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-13  0:31             ` Junio C Hamano
@ 2023-09-13  9:46               ` Oleg Nesterov
  2023-09-14 19:34                 ` René Scharfe
  2023-09-14 19:34               ` René Scharfe
  1 sibling, 1 reply; 15+ messages in thread
From: Oleg Nesterov @ 2023-09-13  9:46 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: René Scharfe, git, Alexey Gladkov

I think I should trim CC to not spam the people who are not
interested in this discussion...

On 09/12, Junio C Hamano wrote:
>
> Documentation may not match the behaviour, but do we know what the
> behaviour we want is?  To me, the last example that shows the same
> line twice (one as a real hit, the other because of "-p") looks a
> bit counter-intuitive for the purpose of "help me locate where the
> grep hits are".  I dunno.

I have another opinion. To me the 2nd "=..." marker does help to
understand the hit location. But this doesn't matter.

Let me repeat: scripts.

I tried to explain this in 0/1 and in my other replies, but lets
start from the very beginning once again.

I've never used git-grep with -p/-n and most probably never will.
But 3 days ago my text editor (vi clone) started to use "grep -pn".

	$ cat -n TEST.c

	     1	void func1(struct pid *);
	     2	
	     3	void func2(struct pid *pid)
	     4	{
	     5		use1(pid);
	     6	}
	     7	
	     8	void func3(struct pid *pid)
	     9	{
	    10		use2(pid);
	    11	}


when I do

	:git-grep --untracked -pn pid TEST.c

in my editor it calls the script which parses the output from git-grep
and puts this

	<pre>
	<a href="TEST.c?1">TEST.c                  </a>    1 <b>                        </b> void func1(struct <i>pid</i> *);
	<a href="TEST.c?3">TEST.c                  </a>    3 <b>                        </b> void func2(struct <i>pid</i> *<i>pid</i>)
	<a href="TEST.c?5">TEST.c                  </a>    5 <b>func2                   </b> use1(<i>pid</i>);
	<a href="TEST.c?8">TEST.c                  </a>    8 <b>                        </b> void func3(struct <i>pid</i> *<i>pid</i>)
	<a href="TEST.c?10">TEST.c                  </a>   10 <b>func3                   </b> use2(<i>pid</i>);
	</pre>

html to the text buffer which is nicely displayed as

	TEST.c                      1                          void func1(struct pid *);
	TEST.c                      3                          void func2(struct pid *pid)
	TEST.c                      5 func2                    use1(pid);
	TEST.c                      8                          void func3(struct pid *pid)
	TEST.c                     10 func3                    use2(pid);

and I can use Ctrl-] to jump to the file/function/location.

And this script is very simple, it parses the output line-by-line. When
it sees the "=" marker it does some minimal post-processing, records the
function name to display it in the 3rd column later, and goes to the next
line.

But without my patch, in this case I get

	TEST.c                      1                          void func1(struct pid *);
	TEST.c                      3                          void func2(struct pid *pid)
	TEST.c                      5                          use1(pid);
	TEST.c                      8                          void func3(struct pid *pid)
	TEST.c                     10                          use2(pid);

because the output from git-grep

	$ git grep --untracked -pn pid TEST.c
	TEST.c:1:void func1(struct pid *);
	TEST.c:3:void func2(struct pid *pid)
	TEST.c:5:       use1(pid);
	TEST.c:8:void func3(struct pid *pid)
	TEST.c:10:      use2(pid);

doesn't have the "=..." markers at all.

But TEST.c is just the trivial/artificial example. From 0/1,

When I do

	:git-grep -pw pid kernel/sys.c

in my editor without this patch, I get

	kernel/sys.c              224 sys_setpriority          struct pid *pgrp;
	kernel/sys.c              294 sys_getpriority          struct pid *pgrp;
	kernel/sys.c              952                          * Note, despite the name, this returns the tgid not the pid.  The tgid and
	kernel/sys.c              953                          * the pid are identical unless CLONE_THREAD was specified on clone() in
	kernel/sys.c              963                          /* Thread ID - the internal kernel "pid" */
	kernel/sys.c              977 sys_getppid              int pid;
	kernel/sys.c              980 sys_getppid              pid = task_tgid_vnr(rcu_dereference(current->real_parent));
	kernel/sys.c              983 sys_getppid              return pid;
	kernel/sys.c             1073                          SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
	kernel/sys.c             1077 sys_times                struct pid *pgrp;
	kernel/sys.c             1080 sys_times                if (!pid)
	kernel/sys.c             1081 sys_times                pid = task_pid_vnr(group_leader);
	kernel/sys.c             1083 sys_times                pgid = pid;
	kernel/sys.c             1094 sys_times                p = find_task_by_vpid(pid);
	kernel/sys.c             1120 sys_times                if (pgid != pid) {
	kernel/sys.c             1144                          static int do_getpgid(pid_t pid)
	kernel/sys.c             1147 sys_times                struct pid *grp;
	kernel/sys.c             1151 sys_times                if (!pid)
	kernel/sys.c             1155 sys_times                p = find_task_by_vpid(pid);
	kernel/sys.c             1172                          SYSCALL_DEFINE1(getpgid, pid_t, pid)
	kernel/sys.c             1174 sys_times                return do_getpgid(pid);
	kernel/sys.c             1186                          SYSCALL_DEFINE1(getsid, pid_t, pid)
	kernel/sys.c             1189 sys_getpgrp              struct pid *sid;
	kernel/sys.c             1193 sys_getpgrp              if (!pid)
	kernel/sys.c             1197 sys_getpgrp              p = find_task_by_vpid(pid);
	kernel/sys.c             1214                          static void set_special_pids(struct pid *pid)
	kernel/sys.c             1218 sys_getpgrp              if (task_session(curr) != pid)
	kernel/sys.c             1219 sys_getpgrp              change_pid(curr, PIDTYPE_SID, pid);
	kernel/sys.c             1221 sys_getpgrp              if (task_pgrp(curr) != pid)
	kernel/sys.c             1222 sys_getpgrp              change_pid(curr, PIDTYPE_PGID, pid);
	kernel/sys.c             1228 ksys_setsid              struct pid *sid = task_pid(group_leader);
	kernel/sys.c             1684                          SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
	kernel/sys.c             1705 check_prlimit_permission tsk = pid ? find_task_by_vpid(pid) : current;

And only the first 5 funcnames are correct.

And note that this case is very simple too (I mostly use :git-grep to scan
the whole linux kernel tree), but even in this simple case I don't think it
makes sense to use "git-grep -pn" directly, the output is hardly readable
(at least to me) with or without my patch.

Oleg.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-12 18:07           ` René Scharfe
  2023-09-13  0:31             ` Junio C Hamano
@ 2023-09-13 10:15             ` Oleg Nesterov
  1 sibling, 0 replies; 15+ messages in thread
From: Oleg Nesterov @ 2023-09-13 10:15 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, git, Alexey Gladkov

On 09/12, René Scharfe wrote:
>
> >> So perhaps git-grep needs another change, something like
> >>
> >> 	if (match_funcname(opt, gs, bol, end_of_line(...)))
> >> 		return;
> >>
> >> at the start of show_funcname_line(), but my patch does not change this
> >> behaviour.
>
> Yes, to make it match the documentation it would need something like
> that.  (Though I'd add a match_funcname() call before the
> show_funcname_line() call in grep_source_1() instead, as it already has
> the eol value.)

Yes, I too thought about this. Except I thought that it makes sense to
pass the additional "unsigned eol" argument to show_funcname_line().
But in any case show_pre_context() will need to calculate eol.

However this is just a minor detail, I am fine either way.

> > So, just in case, please see V2 below. In my opinion it _fixes_ the
> > current behaviour. With this patch
> >
> > 	$ ./git grep --untracked -pn func2 TEST1.c
> > 	TEST1.c:4:void func2()
>
> Indeed that matches the letter of the documentation.
>
> > 	$ ./git grep --untracked -pn xxx TEST2.c
> > 	TEST2.c:1:void func(xxx)
> > 	TEST2.c=1=void func(xxx)
> > 	TEST2.c:3:      use(xxx);
>
> That one as well.

So. Can I assume you agree with my patch ? ;)

> No, I think the documentation is wrong.

Well, to me it looks good, but only after this patch.

Oleg.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-13  0:31             ` Junio C Hamano
  2023-09-13  9:46               ` Oleg Nesterov
@ 2023-09-14 19:34               ` René Scharfe
  1 sibling, 0 replies; 15+ messages in thread
From: René Scharfe @ 2023-09-14 19:34 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Oleg Nesterov, Ævar Arnfjörð Bjarmason,
	Calvin Wan, Carlo Marcelo Arenas Belón, Elijah Newren,
	Jeff King, Linus Torvalds, Mathias Krause, Taylor Blau, git,
	Alexey Gladkov



Am 13.09.23 um 02:31 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
>
>>>> To me, this behaviour looks as
>>>>
>>>> 		Show the preceding line that contains the function name of
>>>> 		the match, unless the _PREVIOUS_ matching line is a function
>>>> 		name itself.
>>
>> To me it looks like:
>>
>> 		Show the preceding line that contains the function name of
>> 		the match.
>>
>> ("Show" meaning "show once", not "show for each match again and again".)
>>
>> Or:
>>
>> 		Show the preceding line that contains the function name of
>> 		the match, unless it is already shown for a different
>> 		reason, e.g. as a match or as the function line of a
>> 		previous match.
>
> Wow, that was a mouthful, but matches my understanding.  I naïvely
> thought "when showing a hit, we may add the line that matches the
> function header pattern before the hit even that header line does
> not hit the grep pattern. But if the header line does hit the grep
> pattern, we do not bother show the same thing twice." was a
> reasonable goal to have.

I agree, and that's probably why I included the "unless the matching
line is a function name itself" part.  Not sure why the code doesn't
agree.  A test for that aspect would have been nice. *ahem*

René

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-13  9:46               ` Oleg Nesterov
@ 2023-09-14 19:34                 ` René Scharfe
  2023-09-17 16:44                   ` Oleg Nesterov
  0 siblings, 1 reply; 15+ messages in thread
From: René Scharfe @ 2023-09-14 19:34 UTC (permalink / raw)
  To: Oleg Nesterov, Junio C Hamano; +Cc: git, Alexey Gladkov

Am 13.09.23 um 11:46 schrieb Oleg Nesterov:
> I think I should trim CC to not spam the people who are not
> interested in this discussion...
>
> On 09/12, Junio C Hamano wrote:
>>
>> Documentation may not match the behaviour, but do we know what the
>> behaviour we want is?  To me, the last example that shows the same
>> line twice (one as a real hit, the other because of "-p") looks a
>> bit counter-intuitive for the purpose of "help me locate where the
>> grep hits are".  I dunno.
>
> I have another opinion. To me the 2nd "=..." marker does help to
> understand the hit location. But this doesn't matter.

You see it as another layer of information, as an annotation, an
additional line containing meta-information.  I saw them as context
lines, i.e. lines from the original file shown in the original order
without duplication, like - lines, with the only place for meta-
information being the marker character itself.

> Let me repeat: scripts.
>
> I tried to explain this in 0/1 and in my other replies, but lets
> start from the very beginning once again.
>
> I've never used git-grep with -p/-n and most probably never will.
> But 3 days ago my text editor (vi clone) started to use "grep -pn".
>
> 	$ cat -n TEST.c
>
> 	     1	void func1(struct pid *);
> 	     2
> 	     3	void func2(struct pid *pid)
> 	     4	{
> 	     5		use1(pid);
> 	     6	}
> 	     7
> 	     8	void func3(struct pid *pid)
> 	     9	{
> 	    10		use2(pid);
> 	    11	}
>
>
> when I do
>
> 	:git-grep --untracked -pn pid TEST.c
>
> in my editor it calls the script which parses the output from git-grep
> and puts this
>
> 	<pre>
> 	<a href="TEST.c?1">TEST.c                  </a>    1 <b>                        </b> void func1(struct <i>pid</i> *);
> 	<a href="TEST.c?3">TEST.c                  </a>    3 <b>                        </b> void func2(struct <i>pid</i> *<i>pid</i>)
> 	<a href="TEST.c?5">TEST.c                  </a>    5 <b>func2                   </b> use1(<i>pid</i>);
> 	<a href="TEST.c?8">TEST.c                  </a>    8 <b>                        </b> void func3(struct <i>pid</i> *<i>pid</i>)
> 	<a href="TEST.c?10">TEST.c                  </a>   10 <b>func3                   </b> use2(<i>pid</i>);
> 	</pre>
>
> html to the text buffer which is nicely displayed as
>
> 	TEST.c                      1                          void func1(struct pid *);
> 	TEST.c                      3                          void func2(struct pid *pid)
> 	TEST.c                      5 func2                    use1(pid);
> 	TEST.c                      8                          void func3(struct pid *pid)
> 	TEST.c                     10 func3                    use2(pid);
>
> and I can use Ctrl-] to jump to the file/function/location.
>
> And this script is very simple, it parses the output line-by-line. When
> it sees the "=" marker it does some minimal post-processing, records the
> function name to display it in the 3rd column later, and goes to the next
> line.
>
> But without my patch, in this case I get
>
> 	TEST.c                      1                          void func1(struct pid *);
> 	TEST.c                      3                          void func2(struct pid *pid)
> 	TEST.c                      5                          use1(pid);
> 	TEST.c                      8                          void func3(struct pid *pid)
> 	TEST.c                     10                          use2(pid);
>
> because the output from git-grep
>
> 	$ git grep --untracked -pn pid TEST.c
> 	TEST.c:1:void func1(struct pid *);
> 	TEST.c:3:void func2(struct pid *pid)
> 	TEST.c:5:       use1(pid);
> 	TEST.c:8:void func3(struct pid *pid)
> 	TEST.c:10:      use2(pid);
>
> doesn't have the "=..." markers at all.

Sure, that's a problem.  You could easily check whether a match is also
a function line according to the default heuristic (does the line start
with a letter, dollar sign or underscore?), e.g. with a glob like
"*:*:[a-zA-Z$_]*".  If git grep uses more sophisticated rules then it
becomes impractical -- there are some impressive regexes in userdiff.c
and the script would have to figure out which language the file is
configured to be for Git in the first place.

> But TEST.c is just the trivial/artificial example. From 0/1,
>
> When I do
>
> 	:git-grep -pw pid kernel/sys.c
>
> in my editor without this patch, I get
>
> 	kernel/sys.c              224 sys_setpriority          struct pid *pgrp;
> 	kernel/sys.c              294 sys_getpriority          struct pid *pgrp;
> 	kernel/sys.c              952                          * Note, despite the name, this returns the tgid not the pid.  The tgid and
> 	kernel/sys.c              953                          * the pid are identical unless CLONE_THREAD was specified on clone() in
> 	kernel/sys.c              963                          /* Thread ID - the internal kernel "pid" */
> 	kernel/sys.c              977 sys_getppid              int pid;
> 	kernel/sys.c              980 sys_getppid              pid = task_tgid_vnr(rcu_dereference(current->real_parent));
> 	kernel/sys.c              983 sys_getppid              return pid;
> 	kernel/sys.c             1073                          SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
> 	kernel/sys.c             1077 sys_times                struct pid *pgrp;
> 	kernel/sys.c             1080 sys_times                if (!pid)
> 	kernel/sys.c             1081 sys_times                pid = task_pid_vnr(group_leader);
> 	kernel/sys.c             1083 sys_times                pgid = pid;
> 	kernel/sys.c             1094 sys_times                p = find_task_by_vpid(pid);
> 	kernel/sys.c             1120 sys_times                if (pgid != pid) {
> 	kernel/sys.c             1144                          static int do_getpgid(pid_t pid)
> 	kernel/sys.c             1147 sys_times                struct pid *grp;
> 	kernel/sys.c             1151 sys_times                if (!pid)
> 	kernel/sys.c             1155 sys_times                p = find_task_by_vpid(pid);
> 	kernel/sys.c             1172                          SYSCALL_DEFINE1(getpgid, pid_t, pid)
> 	kernel/sys.c             1174 sys_times                return do_getpgid(pid);
> 	kernel/sys.c             1186                          SYSCALL_DEFINE1(getsid, pid_t, pid)
> 	kernel/sys.c             1189 sys_getpgrp              struct pid *sid;
> 	kernel/sys.c             1193 sys_getpgrp              if (!pid)
> 	kernel/sys.c             1197 sys_getpgrp              p = find_task_by_vpid(pid);
> 	kernel/sys.c             1214                          static void set_special_pids(struct pid *pid)
> 	kernel/sys.c             1218 sys_getpgrp              if (task_session(curr) != pid)
> 	kernel/sys.c             1219 sys_getpgrp              change_pid(curr, PIDTYPE_SID, pid);
> 	kernel/sys.c             1221 sys_getpgrp              if (task_pgrp(curr) != pid)
> 	kernel/sys.c             1222 sys_getpgrp              change_pid(curr, PIDTYPE_PGID, pid);
> 	kernel/sys.c             1228 ksys_setsid              struct pid *sid = task_pid(group_leader);
> 	kernel/sys.c             1684                          SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
> 	kernel/sys.c             1705 check_prlimit_permission tsk = pid ? find_task_by_vpid(pid) : current;
>
> And only the first 5 funcnames are correct.

Well, your script turns "SYSCALL_DEFINE3(setpriority, [...]" into
"sys_setpriority" etc., so it is already knows a lot about function lines.
It could be made to take a second look at match lines, I guess.

But a general solution would require git grep to somehow report both aspects
of matching function lines.  That's easy if we allow ourselves to duplicate
lines.  This is strange enough to warrant making it a new output format I
think.

Another possibility is to switch the precedence of : and =.  With match
coloring it would still be possible to identify most positive matches in
function interactively, but not negative matches (-v) in function lines.
Probably not the best choice, since grep is primarily about finding
matching lines; function line info comes second.

Can we use two markers, i.e. both : and =?  No idea what that might break.

There is a Unicode symbol named colon equals, which looks like this: ≔
We added the =, so I guess we could add that thing as well.  But is the
world prepared for Unicode output?  Not sure.  If we need to stay in the
ASCII table the same idea could be implemented with a different character
like # or ;.

> And note that this case is very simple too (I mostly use :git-grep to scan
> the whole linux kernel tree), but even in this simple case I don't think it
> makes sense to use "git-grep -pn" directly, the output is hardly readable
> (at least to me) with or without my patch.

So with the patch below this would look like this:

kernel/sys.c=218=SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
kernel/sys.c:224:       struct pid *pgrp;
kernel/sys.c=288=SYSCALL_DEFINE2(getpriority, int, which, int, who)
kernel/sys.c:294:       struct pid *pgrp;
kernel/sys.c=943=SYSCALL_DEFINE1(setfsgid, gid_t, gid)
kernel/sys.c:952: * Note, despite the name, this returns the tgid not the pid.  The tgid and
kernel/sys.c:953: * the pid are identical unless CLONE_THREAD was specified on clone() in
kernel/sys.c=958=SYSCALL_DEFINE0(getpid)
kernel/sys.c:963:/* Thread ID - the internal kernel "pid" */
kernel/sys.c=975=SYSCALL_DEFINE0(getppid)
kernel/sys.c:977:       int pid;
kernel/sys.c:980:       pid = task_tgid_vnr(rcu_dereference(current->real_parent));
kernel/sys.c:983:       return pid;
kernel/sys.c#1073#SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
kernel/sys.c:1077:      struct pid *pgrp;
kernel/sys.c:1080:      if (!pid)
kernel/sys.c:1081:              pid = task_pid_vnr(group_leader);
kernel/sys.c:1083:              pgid = pid;
kernel/sys.c:1094:      p = find_task_by_vpid(pid);
kernel/sys.c:1120:      if (pgid != pid) {
kernel/sys.c#1144#static int do_getpgid(pid_t pid)
kernel/sys.c:1147:      struct pid *grp;
kernel/sys.c:1151:      if (!pid)
kernel/sys.c:1155:              p = find_task_by_vpid(pid);
kernel/sys.c#1172#SYSCALL_DEFINE1(getpgid, pid_t, pid)
kernel/sys.c:1174:      return do_getpgid(pid);
kernel/sys.c#1186#SYSCALL_DEFINE1(getsid, pid_t, pid)
kernel/sys.c:1189:      struct pid *sid;
kernel/sys.c:1193:      if (!pid)
kernel/sys.c:1197:              p = find_task_by_vpid(pid);
kernel/sys.c#1214#static void set_special_pids(struct pid *pid)
kernel/sys.c:1218:      if (task_session(curr) != pid)
kernel/sys.c:1219:              change_pid(curr, PIDTYPE_SID, pid);
kernel/sys.c:1221:      if (task_pgrp(curr) != pid)
kernel/sys.c:1222:              change_pid(curr, PIDTYPE_PGID, pid);
kernel/sys.c=1225=int ksys_setsid(void)
kernel/sys.c:1228:      struct pid *sid = task_pid(group_leader);
kernel/sys.c#1684#SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
kernel/sys.c:1705:      tsk = pid ? find_task_by_vpid(pid) : current;

It uses # for matches that happen to be function lines, and doesn't show
a previous function line for those anymore.  Usable?

René


diff --git a/grep.c b/grep.c
index fc2d0c837a..a08da5cdcb 100644
--- a/grep.c
+++ b/grep.c
@@ -1681,6 +1681,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
 			goto next_line;
 		}
 		if (hit && (opt->max_count < 0 || count < opt->max_count)) {
+			char sign = ':';
 			count++;
 			if (opt->status_only)
 				return 1;
@@ -1697,12 +1698,14 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
 				opt->output(opt, " matches\n", 9);
 				return 1;
 			}
+			if (opt->funcname && match_funcname(opt, gs, bol, eol))
+				sign = '#';
 			/* Hit at this line.  If we haven't shown the
 			 * pre-context lines, we would need to show them.
 			 */
 			if (opt->pre_context || opt->funcbody)
 				show_pre_context(opt, gs, bol, eol, lno);
-			else if (opt->funcname)
+			else if (opt->funcname && sign == ':')
 				show_funcname_line(opt, gs, bol, lno);
 			cno = opt->invert ? icol : col;
 			if (cno < 0) {
@@ -1715,7 +1718,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
 				 */
 				cno = 0;
 			}
-			show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
+			show_line(opt, bol, eol, gs->name, lno, cno + 1, sign);
 			last_hit = lno;
 			if (opt->funcbody)
 				show_function = 1;

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] git-grep: improve the --show-function behaviour
  2023-09-14 19:34                 ` René Scharfe
@ 2023-09-17 16:44                   ` Oleg Nesterov
  0 siblings, 0 replies; 15+ messages in thread
From: Oleg Nesterov @ 2023-09-17 16:44 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, git, Alexey Gladkov

René, sorry for late reply,

On 09/14, René Scharfe wrote:
>
> Am 13.09.23 um 11:46 schrieb Oleg Nesterov:
> >
> > I have another opinion. To me the 2nd "=..." marker does help to
> > understand the hit location. But this doesn't matter.
>
> You see it as another layer of information, as an annotation, an
> additional line containing meta-information.  I saw them as context
> lines, i.e. lines from the original file shown in the original order
> without duplication, like - lines, with the only place for meta-
> information being the marker character itself.

Yes,

> > But without my patch, in this case I get
> >
> > 	TEST.c                      1                          void func1(struct pid *);
> > 	TEST.c                      3                          void func2(struct pid *pid)
> > 	TEST.c                      5                          use1(pid);
> > 	TEST.c                      8                          void func3(struct pid *pid)
> > 	TEST.c                     10                          use2(pid);
> >
> > because the output from git-grep
> >
> > 	$ git grep --untracked -pn pid TEST.c
> > 	TEST.c:1:void func1(struct pid *);
> > 	TEST.c:3:void func2(struct pid *pid)
> > 	TEST.c:5:       use1(pid);
> > 	TEST.c:8:void func3(struct pid *pid)
> > 	TEST.c:10:      use2(pid);
> >
> > doesn't have the "=..." markers at all.
>
> Sure, that's a problem.  You could easily check whether a match is also
> a function line according to the default heuristic

Yes, but...

> there are some impressive regexes in userdiff.c
> and the script would have to figure out which language the file is
> configured to be for Git in the first place.

Yes, and this is what I'd like to avoid, I do not want to duplicate the
builtin_drivers[] logic.

> > in my editor without this patch, I get
> >
> > 	kernel/sys.c              224 sys_setpriority          struct pid *pgrp;

[...snip...]

> Well, your script turns "SYSCALL_DEFINE3(setpriority, [...]" into
> "sys_setpriority" etc., so it is already knows a lot about function lines.

No, not a lot ;)

But yes sure, I can adapt this script to the current behaviour. In fact I
can even change it to not use "-p", the script can read the file backwards
itself (but of course I'd prefer to not do this).

Nevermind. Thanks for discussion. If I can't convince maintainers - lets
forget this patch. Although I will probably keep it (and another one I
had from the very begginning) for myself, it works for me.

> Can we use two markers, i.e. both : and =?  No idea what that might break.

...

> So with the patch below this would look like this:

...

> kernel/sys.c#1073#SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)

This works for me too. So please CC me if you ever push this change ;)

Thanks,

Oleg.


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-09-17 16:46 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-11 12:11 [PATCH 0/1] git-grep: improve the --show-function behaviour Oleg Nesterov
2023-09-11 12:12 ` [PATCH 1/1] " Oleg Nesterov
2023-09-11 20:11   ` René Scharfe
2023-09-11 21:54     ` Oleg Nesterov
2023-09-11 22:34   ` Junio C Hamano
2023-09-11 23:17     ` Oleg Nesterov
2023-09-12 13:04       ` Oleg Nesterov
2023-09-12 13:51         ` Oleg Nesterov
2023-09-12 18:07           ` René Scharfe
2023-09-13  0:31             ` Junio C Hamano
2023-09-13  9:46               ` Oleg Nesterov
2023-09-14 19:34                 ` René Scharfe
2023-09-17 16:44                   ` Oleg Nesterov
2023-09-14 19:34               ` René Scharfe
2023-09-13 10:15             ` Oleg Nesterov

Code repositories for project(s) associated with this public inbox

	https://80x24.org/pub/scm/git/git.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).