From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] WIP
Date: Sat, 11 May 2019 22:55:13 +0000 [thread overview]
Message-ID: <20190511225513.20820-1-e@80x24.org> (raw)
---
lib/PublicInbox/ViewDiff.pm | 367 +++++++++++++++++++++++++++++++++++-
lib/PublicInbox/ViewVCS.pm | 19 +-
2 files changed, 367 insertions(+), 19 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 85b5314..6d732dd 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -10,10 +10,11 @@ package PublicInbox::ViewDiff;
use strict;
use warnings;
use base qw(Exporter);
-our @EXPORT_OK = qw(flush_diff);
+our @EXPORT_OK = qw(flush_diff); # for emails, OO API is for git(1) output
use URI::Escape qw(uri_escape_utf8);
use PublicInbox::Hval qw(ascii_html to_attr from_attr);
use PublicInbox::Git qw(git_unquote);
+use PublicInbox::WwwStream;
# keep track of state so we can avoid redundant HTML tags for
# identically-classed lines
@@ -41,6 +42,17 @@ package PublicInbox::ViewDiff;
my $PATH_A = '"?a/.+|/dev/null';
my $PATH_B = '"?b/.+|/dev/null';
+my $CMT_FMT = '--pretty=format:'.join('%n',
+ '%H', '%s', '%an <%ae>', '%ai', '%cn <%ce>', '%ci',
+ '%t', '%p', '%D', '%b%x00');
+
+sub CC_EMPTY () { " This is a merge, and the combined diff is empty.\n" }
+sub CC_MERGE () { " This is a merge, showing combined diff:\n\n" }
+
+# used for "git show" (on commits) and "git diff"
+my @DIFF_OPT = qw(-z --numstat -p --encoding=UTF-8 -C -B -D
+ --no-color --no-abbrev);
+
sub to_html ($$) {
$_[0]->linkify_1($_[1]);
$_[0]->linkify_2(ascii_html($_[1]));
@@ -143,6 +155,16 @@ ($$$$$)
undef
}
+sub dquery ($$) {
+ my ($pa, $pb) = @_;
+ my $q = '?b=' . uri_escape_utf8($pb, UNSAFE);
+ $q .= '&a=' . uri_escape_utf8($pa, UNSAFE) if $pa ne $pb;
+ $q
+}
+
+# API for emails. public-inbox assumes we can fit any email entirely
+# (because we rely on Email::MIME anyways).
+# This interface assumes that.
sub flush_diff ($$$) {
my ($dst, $ctx, $linkify) = @_;
my $diff = $ctx->{-diff};
@@ -174,13 +196,7 @@ ($$$)
}
$pa = (split('/', git_unquote($pa), 2))[1];
$pb = (split('/', git_unquote($pb), 2))[1];
- $dctx = {
- Q => "?b=".uri_escape_utf8($pb, UNSAFE),
- };
- if ($pa ne $pb) {
- $dctx->{Q} .= '&a='.
- uri_escape_utf8($pa, UNSAFE);
- }
+ $dctx = { Q => dquery($pa, $pb) };
anchor1($dst, $ctx, $linkify, $pb, $s) and next;
$$dst .= to_html($linkify, $s);
} elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
@@ -196,7 +212,7 @@ ($$$)
$$dst .= to_html($linkify, $s);
} elsif ($s =~ s/^@@ (\S+) (\S+) @@//) {
$$dst .= '</span>' if $state2class[$state];
- $$dst .= qq(<span\nclass="hunk">);
+ $$dst .= qq(<span\nclass="hunk">); # XHTML
$$dst .= diff_hunk($dctx, $spfx, $1, $2);
$$dst .= '</span>';
$state = DSTATE_CTX;
@@ -235,4 +251,337 @@ ($$$)
undef;
}
+# OO interface
+sub _cmd ($$) {
+ my ($self, $cmd) = @_;
+ $self->{git_cmd} = join(' ', @$cmd);
+ $cmd;
+}
+
+sub diff_cmd ($$$) {
+ my ($self, $oid_a, $oid_b) = @_;
+ _cmd($self, [ 'diff', @DIFF_OPT, $oid_a, $oid_b, '--' ]);
+}
+
+sub commit_cmd ($$) {
+ my ($self, $oid) = @_;
+ _cmd($self, [ qw(show -c), @DIFF_OPT, $CMT_FMT, $oid, '--' ]);
+}
+
+# OO API for parsing output of git-diff(1), git-show(1), etc...
+# We try to do as much as possible by streaming, so we act as a
+# a stream editor (e.g. 'sed')
+sub new {
+ my ($class) = @_;
+ my $self = {
+ dstate => DSTATE_INIT,
+ dbuf => '',
+ # mhelp => merge help
+ # diff_tree => 1 (true if comparing tree-ish)
+ };
+ $self->{ndiff} = $self->{nchg} = $self->{nadd} = $self->{ndel} = 0;
+ bless $self, $class;
+}
+
+# diffstat links to anchors within the same HTML page
+sub git_diffstat_rename ($$$) {
+ my ($self, $from, $to) = @_;
+ my $anchor = to_attr(git_unquote($to));
+ $self->{anchors}->{$anchor} = $to;
+ my @from = split('/', $from);
+ my @to = split('/', $to);
+ my ($base, @base);
+
+ # only show differing path components
+ while (@to && @from && $to[0] eq $from[0]) {
+ push @base, shift(@to);
+ shift @from;
+ }
+
+ $base = ascii_html(join('/', @base)) if @base;
+ $from = ascii_html(join('/', @from));
+ $to = ascii_html(join('/', @to));
+ $to = qq(<a\nhref="#$anchor">$to</a>);
+ @base ? "$base/{$from => $to}" : "$from => $to";
+}
+
+sub git_diff_sed_stat ($$) {
+ my ($self, $dst) = @_;
+ my @stat = split(/\0/, delete $self->{dbuf}, -1);
+ my $end; # end-of-stat
+ my $nchg = \($self->{nchg});
+ my $nadd = \($self->{nadd});
+ my $ndel = \($self->{ndel});
+ if (!$self->{dstat_started}) {
+ $self->{dstat_started} = 1;
+
+ # merges start with an extra '\0' before the diffstat
+ # non-merge commits start with an extra '\n', instead
+ if ($self->{mhelp}) {
+ if ($stat[0] eq '') {
+ shift @stat;
+ } else {
+ warn
+'initial merge diffstat line was not empty';
+ }
+ } else {
+ # for commits, only (not diff-tree)
+ $stat[0] =~ s/\A\n//s;
+ }
+ }
+ while (defined(my $l = shift @stat)) {
+ if ($l eq '') {
+ $end = 1 if $stat[0] && $stat[0] =~ /\Ad/; # "diff --"
+ last;
+ } elsif ($l =~ /\Adiff /) {
+ unshift @stat, $l;
+ $end = 1;
+ last;
+ }
+ utf8::upgrade($l);
+ $l =~ /\A(\S+)\t+(\S+)\t+(.*)/ or next;
+ my ($add, $del, $fn) = ($1, $2, $3);
+ if ($fn ne '') { # normal modification
+ # TODO: discard diffs if they are too big
+ # gigantic changes with many files may still OOM us
+ my $anchor = to_attr(git_unquote($fn));
+ $self->{anchors}->{$anchor} = $fn;
+ $l = qq(<a\nhref="#$anchor">).ascii_html($fn).'</a>';
+ } else { # rename
+ # incomplete...
+ if (scalar(@stat) < 2) {
+ unshift @stat, $l;
+ last;
+ }
+ my $from = shift @stat;
+ my $to = shift @stat;
+ utf8::upgrade($from);
+ utf8::upgrade($to);
+ $l = git_diffstat_rename($self, $from, $to);
+ }
+
+ # text changes show numerically, Binary does not
+ if ($add =~ /\A\d+\z/) {
+ $$nadd += $add;
+ $$ndel += $del;
+ $add = "+$add";
+ $del = "-$del";
+ }
+ ++$$nchg;
+ my $num = sprintf('% 6s/%-6s', $del, $add);
+ $$dst .= " $num\t$l\n";
+ }
+
+ # the rest of the diff:
+ $self->{dbuf} = join("\0", @stat);
+ return unless $end;
+
+ $self->{dstate} = DSTATE_HEAD;
+ $$dst .= "\n $$nchg ";
+ $$dst .= $$nchg == 1 ? 'file changed, ' : 'files changed, ';
+ $$dst .= $$nadd;
+ $$dst .= $$nadd == 1 ? ' insertion(+), ' : ' insertions(+), ';
+ $$dst .= $$ndel;
+ $$dst .= $$ndel == 1 ? " deletion(-)\n\n" : " deletions(-)\n\n";
+}
+
+# index abcdef89..01234567 100644
+sub git_diff_ab_index ($$$$) {
+ my ($self, $oid_a, $oid_b, $mode) = @_;
+ $self->{oid_a} = $oid_a;
+ $self->{oid_b} = $oid_b;
+ my $range = "$oid_a..$oid_b";
+ if ($self->{diff_tree}) {
+ $range = qq(<a\nhref="../../$range/s/$self->{Q}">$range</a>);
+ }
+ 'index ' . $range . ascii_html($mode);
+}
+
+# diff --git a/foo.c b/bar.c
+sub git_diff_ab_hdr ($$$) {
+ my ($self, $pa, $pb) = @_;
+ my $rv = '';
+ if ($self->{dstate} != DSTATE_HEAD) {
+ to_state(\$rv, $self->{dstate}, DSTATE_HEAD);
+ }
+ $pa = (split('/', git_unquote($pa), 2))[1];
+ $pb = (split('/', git_unquote($pb), 2))[1];
+ $self->{Q} = dquery($pa, $pb) if $self->{diff_tree};
+ my $anchor = to_attr($pb);
+ delete $self->{anchors}->{$anchor};
+
+ # not wasting bandwidth on links here
+ # links in hunk headers are far more useful with line offsets
+ $rv .= qq(<a\nid="$anchor">diff</a> --git ) .
+ ascii_html($pa) . ' ' . ascii_html($pb)
+}
+
+# diff (--cc|--combined)
+sub git_diff_cc_hdr {
+ my ($self, $combined, $path) = @_;
+ $path = git_unquote($path);
+ $self->{Q} = dquery($path, $path);
+ my $anchor = to_attr($path);
+ delete $self->{anchors}->{$anchor};
+ qq(<a\nid="$anchor">diff</a> --$combined ) . ascii_html($path);
+}
+
+sub offset_link ($$$) {
+ my ($qs, $oid, $offset) = @_;
+ my ($n) = ($offset =~ /\A[\-\+](\d+)/);
+ if (defined $n && $n == 0) {
+ # new or deleted file, don't link it
+ $offset;
+ } else {
+ $n = defined $n ? "#n$n" : '';
+ qq(<a href="../../$oid/s/$qs$n">$offset</a>)
+ }
+}
+
+# @@ -1,2 +3,4 @@ (regular diff)
+sub git_diff_ab_hunk ($$$$) {
+ my ($self, $ca, $cb, $func_ctx) = @_;
+ my $qs = $self->{Q};
+
+ qq(<span\nclass=hunk>@@ ) . # HTML
+ offset_link($qs, $self->{oid_a}, $ca) .
+ ' ' . offset_link($qs, $self->{oid_b}, $cb) .
+ ' @@' . ascii_html($func_ctx) . '</span>';
+}
+
+# index abcdef09,01234567..76543210
+sub git_diff_cc_index {
+ my ($self, $before, $last, $end) = @_;
+ $self->{oids_cc} = [ split(',', $before), $last ];
+
+ # not wasting bandwidth on links here, yet
+ # links in hunk headers are far more useful with line offsets
+ "index $before..$last" . ascii_html($end);
+}
+
+# @@@ -1,2 -3,4 +5,6 @@@ (combined diff)
+sub git_diff_cc_hunk ($$$$) {
+ my ($self, $at_signs, $offs, $func_ctx) = @_;
+ my $pobj = $self->{oids_cc};
+ my $i = 0;
+ my $qs = $self->{Q};
+ qq(<span\nclass=hunk>@@ ) . # HTML
+ join(' ', $at_signs, map {
+ offset_link($qs, $pobj->[$i++], $_);
+ } split(' ', $offs),
+ $at_signs) . ascii_html($func_ctx) . '</span>';
+}
+
+# the rest of the diff (beyond diffstat)
+sub git_diff_sed_lines ($$) {
+ my ($self, $dst) = @_;
+
+ my @dlines = split(/\n/, delete $self->{dbuf}, -1);
+
+ # don't touch the last line, it may not be terminated
+ $self->{dbuf} = pop @dlines;
+
+ if (my $help = delete $self->{mhelp}) {
+ $$dst .= $help; # CC_MERGE
+ }
+
+ # reminder: this is stricter than similar code in flush_diff,
+ # this is for git output (including --cc/--combined) we generate,
+ # while flush_diff parses mail
+ my $ndiff = \($self->{ndiff});
+ my $linkify = PublicInbox::Linkify->new;
+ while (defined(my $s = shift @dlines)) {
+ utf8::upgrade($s);
+ if ($s =~ m{\Adiff --git ("?a/.+) ("?b/.+)\z}) { # regular
+ $$dst .= git_diff_ab_hdr($self, $1, $2);
+ } elsif ($s =~ m{\Adiff --(cc|combined) (.+)\z}) {
+ $$dst .= git_diff_cc_hdr($self, $1, $2);
+ } elsif ($s =~ /\Aindex ($OID_BLOB)\.\.($OID_BLOB)(.*)\z/o) {
+ # regular diff
+ $$dst .= git_diff_ab_index($self, $1, $2, $3);
+ } elsif ($s =~
+ /\Aindex ($OID_BLOB,[^\.]+)\.\.($OID_BLOB)(.*)\z/o) {
+ # --cc diff
+ $$dst .= git_diff_cc_index($self, $1, $2, $3);
+ } elsif ($s =~ /\A@@ (\S+) (\S+) @@(.*)\z/) { # regular
+ $$dst .= '</span>' if $state2class[$self->{dstate}];
+ $$dst .= git_diff_ab_hunk($self, $1, $2, $3);
+ $self->{dstate} = DSTATE_CTX;
+ } elsif ($s =~ /\A(@@@+) (\S+.*\S+) @@@+(.*)\z/) { # --cc
+ $$dst .= '</span>' if $state2class[$self->{dstate}];
+ $$dst .= git_diff_cc_hunk($self, $1, $2, $3);
+ } elsif ($s =~ /^ /) {
+ # works for common cases, but not weird/long filenames
+ if ($self->{dstate} == DSTATE_STAT &&
+ $s =~ /^ (.+)( +\| .*\z)/s) {
+ anchor0(\$dst, $self, $linkify, $1, $2) and next;
+ } elsif ($state2class[$self->{dstate}]) {
+ to_state($dst, $self->{dstate}, DSTATE_CTX);
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ m!^--- ! || $s =~ m!^\+{3} !) {
+ # color only (no oid link) if missing dctx->{oid_*}
+ $self->{dstate} <= DSTATE_STAT and
+ to_state($dst, $state, DSTATE_HEAD);
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^\+/) {
+ if ($self->{dstate} != DSTATE_ADD) {
+ to_state($dst, $state, DSTATE_ADD);
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^-/) {
+ if ($$state != DSTATE_DEL && $$state > DSTATE_STAT) {
+ to_state($dst, $state, DSTATE_DEL);
+ }
+ $$dst .= to_html($linkify, $s);
+ # ignore the following lines in headers:
+ } elsif ($s =~ /^(?:dis)similarity index/ ||
+ $s =~ /^(?:old|new) mode/ ||
+ $s =~ /^(?:deleted|new) file mode/ ||
+ $s =~ /^(?:copy|rename) (?:from|to) / ||
+ $s =~ /^(?:dis)?similarity index /) {
+ $$dst .= to_html($linkify, $s);
+ } else {
+ $$dst .= to_html($linkify, $s);
+ }
+ ++$$ndiff;
+ }
+}
+
+sub git_diff_sed_run ($$) {
+ my ($self, $dst) = @_;
+ $self->{dstate} == DSTATE_STAT and git_diff_sed_stat($self, $dst);
+ $self->{dstate} > DSTATE_STAT and git_diff_sed_lines($self, $dst);
+ undef;
+}
+
+sub git_diff_sed_close ($$) {
+ my ($self, $dst) = @_;
+ my $tmp = delete $self->{dbuf};
+ utf8::upgrade($tmp);
+ $$dst .= $tmp;
+ undef;
+}
+
+sub git_diff_sed {
+ my ($self, $ctx) = @_;
+ my $ws = { ctx => $ctx };
+ my @first = PublicInbox::WwwStream::html_top($ws) . '<pre>';
+ $ctx->{-html_tip} = "<pre>Output of: git $self->{git_cmd}\n";
+ $self->{dstate} = DSTATE_STAT;
+
+ # this filters for $fh->write or $body->getline (see Qspawn)
+ sub {
+ my $dst = shift @first || '';
+ if (defined $_[0]) { # $_[0] == scalar buffer
+ $self->{dbuf} .= $_[0];
+ git_diff_sed_run($self, \$dst);
+ } else { # undef means EOF from "git show", flush the last bit
+ git_diff_sed_close($self, \$dst);
+ $dst .= '</pre>'.PublicInbox::WwwStream::html_end($ws);
+ }
+ $dst;
+ }
+}
+
1;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index c693fcf..2ba09a8 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -20,7 +20,7 @@ package PublicInbox::ViewVCS;
use PublicInbox::SolverGit;
use PublicInbox::WwwStream;
use PublicInbox::Linkify;
-use PublicInbox::ViewDiff qw(flush_diff);
+use PublicInbox::ViewDiff;
use PublicInbox::Hval qw(ascii_html to_filename);
my $hl = eval {
require PublicInbox::HlMod;
@@ -181,23 +181,22 @@ ($$$$$$)
return html_page($ctx, 500, \'seek error');
}
$log = do { local $/; <$log> };
- warn "log: $log\n";
+ my $vdiff = PublicInbox::ViewDiff->new;
my $git_b = $res_b->[0];
- my $cmd = ['git', "--git-dir=$git_b->{git_dir}", 'diff',
- $res_a->[1], $res_b->[1] ];
+ my $gcmd = $vdiff->diff_cmd($res_a->[1], $res_b->[1]);
+ my $cmd = ['git', "--git-dir=$git_b->{git_dir}", @$gcmd ];
my $qsp = PublicInbox::Qspawn->new($cmd);
my $env = $ctx->{env};
$env->{'qspawn.wcb'} = delete $ctx->{-wcb};
- $qsp->psgi_return($env, undef, sub {
+ $qsp->psgi_return($env, undef, sub { # parse header
my ($r, $bref) = @_;
if (!defined $r) { # error
html_page($ctx, 500, $log);
- } elsif (index($$bref, "\0") >= 0) {
- my $ct = 'application/octet-stream';
- [200, ['Content-Type', $ct ] ];
+ } elsif ($r == 0) {
+ PublicInbox::WwwStream::r($ctx, 200, 'empty diff');
} else {
- my $ct = 'text/plain; charset=UTF-8';
- [200, ['Content-Type', $ct] ];
+ $env->{'qspawn.filter'} = $vdiff->git_diff_sed($ctx);
+ PublicInbox::WwwStream::r($ctx, 200);
}
});
}
--
EW
next reply other threads:[~2019-05-11 22:55 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-11 22:55 Eric Wong [this message]
-- strict thread matches above, loose matches on Subject: below --
2021-10-27 20:16 [PATCH] wip Eric Wong
2021-06-05 19:58 Eric Wong
2021-04-05 7:42 Eric Wong
2021-03-08 7:11 Eric Wong
2021-01-21 4:24 [PATCH] WIP Eric Wong
2021-01-03 22:57 [PATCH] wip Eric Wong
2020-12-27 11:36 [PATCH] WIP Eric Wong
2020-11-15 7:35 [PATCH] wip Eric Wong
2020-04-23 4:27 Eric Wong
2020-04-20 7:14 Eric Wong
2020-01-13 9:24 [PATCH] WIP Eric Wong
2019-01-02 9:21 [PATCH] wip Eric Wong
2018-07-06 21:31 Eric Wong
2018-06-24 11:55 Eric Wong
2018-06-24 8:39 Eric Wong
2017-07-15 1:42 [PATCH] WIP Eric Wong
2017-04-12 20:17 [PATCH] wip Eric Wong
2017-04-05 18:40 Eric Wong
2016-08-23 20:07 Eric Wong
2016-08-18 2:16 Eric Wong
2016-06-26 3:46 Eric Wong
2015-12-22 0:15 Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190511225513.20820-1-e@80x24.org \
--to=e@80x24.org \
--cc=spew@80x24.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).