dumping ground for random patches and texts
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] WIP
Date: Sat, 11 May 2019 22:55:13 +0000	[thread overview]
Message-ID: <20190511225513.20820-1-e@80x24.org> (raw)

---
 lib/PublicInbox/ViewDiff.pm | 367 +++++++++++++++++++++++++++++++++++-
 lib/PublicInbox/ViewVCS.pm  |  19 +-
 2 files changed, 367 insertions(+), 19 deletions(-)

diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 85b5314..6d732dd 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -10,10 +10,11 @@ package PublicInbox::ViewDiff;
 use strict;
 use warnings;
 use base qw(Exporter);
-our @EXPORT_OK = qw(flush_diff);
+our @EXPORT_OK = qw(flush_diff); # for emails, OO API is for git(1) output
 use URI::Escape qw(uri_escape_utf8);
 use PublicInbox::Hval qw(ascii_html to_attr from_attr);
 use PublicInbox::Git qw(git_unquote);
+use PublicInbox::WwwStream;
 
 # keep track of state so we can avoid redundant HTML tags for
 # identically-classed lines
@@ -41,6 +42,17 @@ package PublicInbox::ViewDiff;
 my $PATH_A = '"?a/.+|/dev/null';
 my $PATH_B = '"?b/.+|/dev/null';
 
+my $CMT_FMT = '--pretty=format:'.join('%n',
+		'%H', '%s', '%an <%ae>', '%ai', '%cn <%ce>', '%ci',
+		'%t', '%p', '%D', '%b%x00');
+
+sub CC_EMPTY () { " This is a merge, and the combined diff is empty.\n" }
+sub CC_MERGE () { " This is a merge, showing combined diff:\n\n" }
+
+# used for "git show" (on commits) and "git diff"
+my @DIFF_OPT = qw(-z --numstat -p --encoding=UTF-8 -C -B -D
+		  --no-color --no-abbrev);
+
 sub to_html ($$) {
 	$_[0]->linkify_1($_[1]);
 	$_[0]->linkify_2(ascii_html($_[1]));
@@ -143,6 +155,16 @@ ($$$$$)
 	undef
 }
 
+sub dquery ($$) {
+	my ($pa, $pb) = @_;
+	my $q = '?b=' . uri_escape_utf8($pb, UNSAFE);
+	$q .= '&amp;a=' . uri_escape_utf8($pa, UNSAFE) if $pa ne $pb;
+	$q
+}
+
+# API for emails. public-inbox assumes we can fit any email entirely
+# (because we rely on Email::MIME anyways).
+# This interface assumes that.
 sub flush_diff ($$$) {
 	my ($dst, $ctx, $linkify) = @_;
 	my $diff = $ctx->{-diff};
@@ -174,13 +196,7 @@ ($$$)
 			}
 			$pa = (split('/', git_unquote($pa), 2))[1];
 			$pb = (split('/', git_unquote($pb), 2))[1];
-			$dctx = {
-				Q => "?b=".uri_escape_utf8($pb, UNSAFE),
-			};
-			if ($pa ne $pb) {
-				$dctx->{Q} .= '&amp;a='.
-					uri_escape_utf8($pa, UNSAFE);
-			}
+			$dctx = { Q => dquery($pa, $pb) };
 			anchor1($dst, $ctx, $linkify, $pb, $s) and next;
 			$$dst .= to_html($linkify, $s);
 		} elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
@@ -196,7 +212,7 @@ ($$$)
 			$$dst .= to_html($linkify, $s);
 		} elsif ($s =~ s/^@@ (\S+) (\S+) @@//) {
 			$$dst .= '</span>' if $state2class[$state];
-			$$dst .= qq(<span\nclass="hunk">);
+			$$dst .= qq(<span\nclass="hunk">); # XHTML
 			$$dst .= diff_hunk($dctx, $spfx, $1, $2);
 			$$dst .= '</span>';
 			$state = DSTATE_CTX;
@@ -235,4 +251,337 @@ ($$$)
 	undef;
 }
 
+# OO interface
+sub _cmd ($$) {
+	my ($self, $cmd) = @_;
+	$self->{git_cmd} = join(' ', @$cmd);
+	$cmd;
+}
+
+sub diff_cmd ($$$) {
+	my ($self, $oid_a, $oid_b) = @_;
+	_cmd($self, [ 'diff', @DIFF_OPT, $oid_a, $oid_b, '--' ]);
+}
+
+sub commit_cmd ($$) {
+	my ($self, $oid) = @_;
+	_cmd($self, [ qw(show -c), @DIFF_OPT, $CMT_FMT, $oid, '--' ]);
+}
+
+# OO API for parsing output of git-diff(1), git-show(1), etc...
+# We try to do as much as possible by streaming, so we act as a
+# a stream editor (e.g. 'sed')
+sub new {
+	my ($class) = @_;
+	my $self = {
+		dstate => DSTATE_INIT,
+		dbuf => '',
+		# mhelp => merge help
+		# diff_tree => 1 (true if comparing tree-ish)
+	};
+	$self->{ndiff} = $self->{nchg} = $self->{nadd} = $self->{ndel} = 0;
+	bless $self, $class;
+}
+
+# diffstat links to anchors within the same HTML page
+sub git_diffstat_rename ($$$) {
+	my ($self, $from, $to) = @_;
+	my $anchor = to_attr(git_unquote($to));
+	$self->{anchors}->{$anchor} = $to;
+	my @from = split('/', $from);
+	my @to = split('/', $to);
+	my ($base, @base);
+
+	# only show differing path components
+	while (@to && @from && $to[0] eq $from[0]) {
+		push @base, shift(@to);
+		shift @from;
+	}
+
+	$base = ascii_html(join('/', @base)) if @base;
+	$from = ascii_html(join('/', @from));
+	$to = ascii_html(join('/', @to));
+	$to = qq(<a\nhref="#$anchor">$to</a>);
+	@base ? "$base/{$from =&gt; $to}" : "$from =&gt; $to";
+}
+
+sub git_diff_sed_stat ($$) {
+	my ($self, $dst) = @_;
+	my @stat = split(/\0/, delete $self->{dbuf}, -1);
+	my $end; # end-of-stat
+	my $nchg = \($self->{nchg});
+	my $nadd = \($self->{nadd});
+	my $ndel = \($self->{ndel});
+	if (!$self->{dstat_started}) {
+		$self->{dstat_started} = 1;
+
+		# merges start with an extra '\0' before the diffstat
+		# non-merge commits start with an extra '\n', instead
+		if ($self->{mhelp}) {
+			if ($stat[0] eq '') {
+				shift @stat;
+			} else {
+				warn
+'initial merge diffstat line was not empty';
+			}
+		} else {
+			# for commits, only (not diff-tree)
+			$stat[0] =~ s/\A\n//s;
+		}
+	}
+	while (defined(my $l = shift @stat)) {
+		if ($l eq '') {
+			$end = 1 if $stat[0] && $stat[0] =~ /\Ad/; # "diff --"
+			last;
+		} elsif ($l =~ /\Adiff /) {
+			unshift @stat, $l;
+			$end = 1;
+			last;
+		}
+		utf8::upgrade($l);
+		$l =~ /\A(\S+)\t+(\S+)\t+(.*)/ or next;
+		my ($add, $del, $fn) = ($1, $2, $3);
+		if ($fn ne '') { # normal modification
+			# TODO: discard diffs if they are too big
+			# gigantic changes with many files may still OOM us
+			my $anchor = to_attr(git_unquote($fn));
+			$self->{anchors}->{$anchor} = $fn;
+			$l = qq(<a\nhref="#$anchor">).ascii_html($fn).'</a>';
+		} else { # rename
+			# incomplete...
+			if (scalar(@stat) < 2) {
+				unshift @stat, $l;
+				last;
+			}
+			my $from = shift @stat;
+			my $to = shift @stat;
+			utf8::upgrade($from);
+			utf8::upgrade($to);
+			$l = git_diffstat_rename($self, $from, $to);
+		}
+
+		# text changes show numerically, Binary does not
+		if ($add =~ /\A\d+\z/) {
+			$$nadd += $add;
+			$$ndel += $del;
+			$add = "+$add";
+			$del = "-$del";
+		}
+		++$$nchg;
+		my $num = sprintf('% 6s/%-6s', $del, $add);
+		$$dst .= " $num\t$l\n";
+	}
+
+	# the rest of the diff:
+	$self->{dbuf} = join("\0", @stat);
+	return unless $end;
+
+	$self->{dstate} = DSTATE_HEAD;
+	$$dst .= "\n $$nchg ";
+	$$dst .= $$nchg  == 1 ? 'file changed, ' : 'files changed, ';
+	$$dst .= $$nadd;
+	$$dst .= $$nadd == 1 ? ' insertion(+), ' : ' insertions(+), ';
+	$$dst .= $$ndel;
+	$$dst .= $$ndel == 1 ? " deletion(-)\n\n" : " deletions(-)\n\n";
+}
+
+# index abcdef89..01234567 100644
+sub git_diff_ab_index ($$$$) {
+	my ($self, $oid_a, $oid_b, $mode) = @_;
+	$self->{oid_a} = $oid_a;
+	$self->{oid_b} = $oid_b;
+	my $range = "$oid_a..$oid_b";
+	if ($self->{diff_tree}) {
+		$range = qq(<a\nhref="../../$range/s/$self->{Q}">$range</a>);
+	}
+	'index ' . $range . ascii_html($mode);
+}
+
+# diff --git a/foo.c b/bar.c
+sub git_diff_ab_hdr ($$$) {
+	my ($self, $pa, $pb) = @_;
+	my $rv = '';
+	if ($self->{dstate} != DSTATE_HEAD) {
+		to_state(\$rv, $self->{dstate}, DSTATE_HEAD);
+	}
+	$pa = (split('/', git_unquote($pa), 2))[1];
+	$pb = (split('/', git_unquote($pb), 2))[1];
+	$self->{Q} = dquery($pa, $pb) if $self->{diff_tree};
+	my $anchor = to_attr($pb);
+	delete $self->{anchors}->{$anchor};
+
+	# not wasting bandwidth on links here
+	# links in hunk headers are far more useful with line offsets
+	$rv .= qq(<a\nid="$anchor">diff</a> --git ) .
+		ascii_html($pa) . ' ' . ascii_html($pb)
+}
+
+# diff (--cc|--combined)
+sub git_diff_cc_hdr {
+	my ($self, $combined, $path) = @_;
+	$path = git_unquote($path);
+	$self->{Q} = dquery($path, $path);
+	my $anchor = to_attr($path);
+	delete $self->{anchors}->{$anchor};
+	qq(<a\nid="$anchor">diff</a> --$combined ) . ascii_html($path);
+}
+
+sub offset_link ($$$) {
+	my ($qs, $oid, $offset) = @_;
+	my ($n) = ($offset =~ /\A[\-\+](\d+)/);
+	if (defined $n && $n == 0) {
+		# new or deleted file, don't link it
+		$offset;
+	} else {
+		$n = defined $n ? "#n$n" : '';
+		qq(<a href="../../$oid/s/$qs$n">$offset</a>)
+	}
+}
+
+# @@ -1,2 +3,4 @@ (regular diff)
+sub git_diff_ab_hunk ($$$$) {
+	my ($self, $ca, $cb, $func_ctx) = @_;
+	my $qs = $self->{Q};
+
+	qq(<span\nclass=hunk>@@  ) . # HTML
+	offset_link($qs, $self->{oid_a}, $ca) .
+	' ' . offset_link($qs, $self->{oid_b}, $cb) .
+	' @@' . ascii_html($func_ctx) . '</span>';
+}
+
+# index abcdef09,01234567..76543210
+sub git_diff_cc_index {
+	my ($self, $before, $last, $end) = @_;
+	$self->{oids_cc} = [ split(',', $before), $last ];
+
+	# not wasting bandwidth on links here, yet
+	# links in hunk headers are far more useful with line offsets
+	"index $before..$last" . ascii_html($end);
+}
+
+# @@@ -1,2 -3,4 +5,6 @@@ (combined diff)
+sub git_diff_cc_hunk ($$$$) {
+	my ($self, $at_signs, $offs, $func_ctx) = @_;
+	my $pobj = $self->{oids_cc};
+	my $i = 0;
+	my $qs = $self->{Q};
+	qq(<span\nclass=hunk>@@  ) . # HTML
+		join(' ', $at_signs, map {
+				offset_link($qs, $pobj->[$i++], $_);
+			} split(' ', $offs),
+		$at_signs) . ascii_html($func_ctx) . '</span>';
+}
+
+# the rest of the diff (beyond diffstat)
+sub git_diff_sed_lines ($$) {
+	my ($self, $dst) = @_;
+
+	my @dlines = split(/\n/, delete $self->{dbuf}, -1);
+
+	# don't touch the last line, it may not be terminated
+	$self->{dbuf} = pop @dlines;
+
+	if (my $help = delete $self->{mhelp}) {
+		$$dst .= $help; # CC_MERGE
+	}
+
+	# reminder: this is stricter than similar code in flush_diff,
+	# this is for git output (including --cc/--combined) we generate,
+	# while flush_diff parses mail
+	my $ndiff = \($self->{ndiff});
+	my $linkify = PublicInbox::Linkify->new;
+	while (defined(my $s = shift @dlines)) {
+		utf8::upgrade($s);
+		if ($s =~ m{\Adiff --git ("?a/.+) ("?b/.+)\z}) { # regular
+			$$dst .= git_diff_ab_hdr($self, $1, $2);
+		} elsif ($s =~ m{\Adiff --(cc|combined) (.+)\z}) {
+			$$dst .= git_diff_cc_hdr($self, $1, $2);
+		} elsif ($s =~ /\Aindex ($OID_BLOB)\.\.($OID_BLOB)(.*)\z/o) {
+			# regular diff
+			$$dst .= git_diff_ab_index($self, $1, $2, $3);
+		} elsif ($s =~
+			 /\Aindex ($OID_BLOB,[^\.]+)\.\.($OID_BLOB)(.*)\z/o) {
+			# --cc diff
+			$$dst .= git_diff_cc_index($self, $1, $2, $3);
+		} elsif ($s =~ /\A@@ (\S+) (\S+) @@(.*)\z/) { # regular
+			$$dst .= '</span>' if $state2class[$self->{dstate}];
+			$$dst .= git_diff_ab_hunk($self, $1, $2, $3);
+			$self->{dstate} = DSTATE_CTX;
+		} elsif ($s =~ /\A(@@@+) (\S+.*\S+) @@@+(.*)\z/) { # --cc
+			$$dst .= '</span>' if $state2class[$self->{dstate}];
+			$$dst .= git_diff_cc_hunk($self, $1, $2, $3);
+		} elsif ($s =~ /^ /) {
+			# works for common cases, but not weird/long filenames
+			if ($self->{dstate} == DSTATE_STAT &&
+					$s =~ /^ (.+)( +\| .*\z)/s) {
+				anchor0(\$dst, $self, $linkify, $1, $2) and next;
+			} elsif ($state2class[$self->{dstate}]) {
+				to_state($dst, $self->{dstate}, DSTATE_CTX);
+			}
+			$$dst .= to_html($linkify, $s);
+		} elsif ($s =~ m!^--- ! || $s =~ m!^\+{3} !)  {
+			# color only (no oid link) if missing dctx->{oid_*}
+			$self->{dstate} <= DSTATE_STAT and
+				to_state($dst, $state, DSTATE_HEAD);
+			$$dst .= to_html($linkify, $s);
+		} elsif ($s =~ /^\+/) {
+			if ($self->{dstate} != DSTATE_ADD) {
+				to_state($dst, $state, DSTATE_ADD);
+			}
+			$$dst .= to_html($linkify, $s);
+		} elsif ($s =~ /^-/) {
+			if ($$state != DSTATE_DEL && $$state > DSTATE_STAT) {
+				to_state($dst, $state, DSTATE_DEL);
+			}
+			$$dst .= to_html($linkify, $s);
+		# ignore the following lines in headers:
+		} elsif ($s =~ /^(?:dis)similarity index/ ||
+			 $s =~ /^(?:old|new) mode/ ||
+			 $s =~ /^(?:deleted|new) file mode/ ||
+			 $s =~ /^(?:copy|rename) (?:from|to) / ||
+			 $s =~ /^(?:dis)?similarity index /) {
+			$$dst .= to_html($linkify, $s);
+		} else {
+			$$dst .= to_html($linkify, $s);
+		}
+		++$$ndiff;
+	}
+}
+
+sub git_diff_sed_run ($$) {
+	my ($self, $dst) = @_;
+	$self->{dstate} == DSTATE_STAT and git_diff_sed_stat($self, $dst);
+	$self->{dstate} > DSTATE_STAT and git_diff_sed_lines($self, $dst);
+	undef;
+}
+
+sub git_diff_sed_close ($$) {
+	my ($self, $dst) = @_;
+	my $tmp = delete $self->{dbuf};
+	utf8::upgrade($tmp);
+	$$dst .= $tmp;
+	undef;
+}
+
+sub git_diff_sed {
+	my ($self, $ctx) = @_;
+	my $ws = { ctx => $ctx };
+	my @first = PublicInbox::WwwStream::html_top($ws) . '<pre>';
+	$ctx->{-html_tip} = "<pre>Output of: git $self->{git_cmd}\n";
+	$self->{dstate} = DSTATE_STAT;
+
+	# this filters for $fh->write or $body->getline (see Qspawn)
+	sub {
+		my $dst = shift @first || '';
+		if (defined $_[0]) { # $_[0] == scalar buffer
+			$self->{dbuf} .= $_[0];
+			git_diff_sed_run($self, \$dst);
+		} else { # undef means EOF from "git show", flush the last bit
+			git_diff_sed_close($self, \$dst);
+			$dst .= '</pre>'.PublicInbox::WwwStream::html_end($ws);
+		}
+		$dst;
+	}
+}
+
 1;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index c693fcf..2ba09a8 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -20,7 +20,7 @@ package PublicInbox::ViewVCS;
 use PublicInbox::SolverGit;
 use PublicInbox::WwwStream;
 use PublicInbox::Linkify;
-use PublicInbox::ViewDiff qw(flush_diff);
+use PublicInbox::ViewDiff;
 use PublicInbox::Hval qw(ascii_html to_filename);
 my $hl = eval {
 	require PublicInbox::HlMod;
@@ -181,23 +181,22 @@ ($$$$$$)
 		return html_page($ctx, 500, \'seek error');
 	}
 	$log = do { local $/; <$log> };
-	warn "log: $log\n";
+	my $vdiff = PublicInbox::ViewDiff->new;
 	my $git_b = $res_b->[0];
-	my $cmd = ['git', "--git-dir=$git_b->{git_dir}", 'diff',
-			$res_a->[1], $res_b->[1] ];
+	my $gcmd = $vdiff->diff_cmd($res_a->[1], $res_b->[1]);
+	my $cmd = ['git', "--git-dir=$git_b->{git_dir}", @$gcmd ];
 	my $qsp = PublicInbox::Qspawn->new($cmd);
 	my $env = $ctx->{env};
 	$env->{'qspawn.wcb'} = delete $ctx->{-wcb};
-	$qsp->psgi_return($env, undef, sub {
+	$qsp->psgi_return($env, undef, sub { # parse header
 		my ($r, $bref) = @_;
 		if (!defined $r) { # error
 			html_page($ctx, 500, $log);
-		} elsif (index($$bref, "\0") >= 0) {
-			my $ct = 'application/octet-stream';
-			[200, ['Content-Type', $ct ] ];
+		} elsif ($r == 0) {
+			PublicInbox::WwwStream::r($ctx, 200, 'empty diff');
 		} else {
-			my $ct = 'text/plain; charset=UTF-8';
-			[200, ['Content-Type', $ct] ];
+			$env->{'qspawn.filter'} = $vdiff->git_diff_sed($ctx);
+			PublicInbox::WwwStream::r($ctx, 200);
 		}
 	});
 }
-- 
EW


             reply	other threads:[~2019-05-11 22:55 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-11 22:55 Eric Wong [this message]
  -- strict thread matches above, loose matches on Subject: below --
2021-10-27 20:16 [PATCH] wip Eric Wong
2021-06-05 19:58 Eric Wong
2021-04-05  7:42 Eric Wong
2021-03-08  7:11 Eric Wong
2021-01-21  4:24 [PATCH] WIP Eric Wong
2021-01-03 22:57 [PATCH] wip Eric Wong
2020-12-27 11:36 [PATCH] WIP Eric Wong
2020-11-15  7:35 [PATCH] wip Eric Wong
2020-04-23  4:27 Eric Wong
2020-04-20  7:14 Eric Wong
2020-01-13  9:24 [PATCH] WIP Eric Wong
2019-01-02  9:21 [PATCH] wip Eric Wong
2018-07-06 21:31 Eric Wong
2018-06-24 11:55 Eric Wong
2018-06-24  8:39 Eric Wong
2017-07-15  1:42 [PATCH] WIP Eric Wong
2017-04-12 20:17 [PATCH] wip Eric Wong
2017-04-05 18:40 Eric Wong
2016-08-23 20:07 Eric Wong
2016-08-18  2:16 Eric Wong
2016-06-26  3:46 Eric Wong
2015-12-22  0:15 Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190511225513.20820-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=spew@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).