From 5027b5fad0aa4a448e53eeba4027328dd528c918 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 21 Jan 2017 11:34:31 +0000 Subject: repobrowse: simplify git log parsing implementation Based on what was done for the Atom feed, this will allow us to simplify state management through metaprogramming and avoid placeholder characters ('D' for decoration) for empty fields. --- MANIFEST | 1 + lib/PublicInbox/RepobrowseGitLog.pm | 150 +++++++++++++++--------------------- t/repobrowse_git_log.t | 19 +++++ 3 files changed, 81 insertions(+), 89 deletions(-) create mode 100644 t/repobrowse_git_log.t diff --git a/MANIFEST b/MANIFEST index 789ed68c..29b98e90 100644 --- a/MANIFEST +++ b/MANIFEST @@ -183,6 +183,7 @@ t/repobrowse_git.t t/repobrowse_git_atom.t t/repobrowse_git_commit.t t/repobrowse_git_httpd.t +t/repobrowse_git_log.t t/repobrowse_git_plain.t t/repobrowse_git_snapshot.t t/repobrowse_git_tree.t diff --git a/lib/PublicInbox/RepobrowseGitLog.pm b/lib/PublicInbox/RepobrowseGitLog.pm index e62486ba..21c23fd3 100644 --- a/lib/PublicInbox/RepobrowseGitLog.pm +++ b/lib/PublicInbox/RepobrowseGitLog.pm @@ -10,8 +10,9 @@ use base qw(PublicInbox::RepobrowseBase); use PublicInbox::RepobrowseGit qw(git_dec_links git_commit_title); use PublicInbox::Qspawn; # cannot rely on --date=format-local:... yet, it is too new (September 2015) -my $LOG_FMT = '--pretty=tformat:'. - join('%x00', qw(%h %p %s D%D %ai a%an b%b), '', ''); +use constant STATES => qw(h p D ai an s b); +use constant STATE_BODY => (scalar(STATES) - 1); +my $LOG_FMT = '--pretty=tformat:'. join('%n', map { "%$_" } STATES).'%x00'; sub parent_links { if (@_ == 1) { # typical, single-parent commit @@ -24,11 +25,33 @@ sub parent_links { } } +sub flush_log_hdr ($$$) { + my ($req, $dst, $hdr) = @_; + my $rel = $req->{relcmd}; + my $seen = $req->{seen}; + $$dst .= '
' if scalar keys %$seen;
+	my $id = $hdr->{h};
+	$seen->{$id} = 1;
+	$$dst .= qq();
+	$$dst .= utf8_html($hdr->{'s'}); # FIXME may still OOM
+	$$dst .= '';
+	my $D = $hdr->{D}; # FIXME: thousands of decorations may OOM us
+	if ($D ne '') {
+		$$dst .= ' (' . join(', ', git_dec_links($rel, $D)) . ')';
+	}
+	my @p = split(/ /, $hdr->{p});
+	push @{$req->{parents}}, @p;
+	my $plinks = parent_links(@p);
+	$$dst .= "\n- ";
+	$$dst .= utf8_html($hdr->{an});
+	$$dst .= " @ $hdr->{ai}\n  commit $id$plinks\n";
+	undef
+}
+
 sub git_log_sed_end ($$) {
-	my $req = $_[0];
-	my $dst = delete $req->{lhtml} || '';
-	$dst .= utf8_html($_[1]); # existing buffer
-	$dst .= '

';
+	my ($req, $dst) = @_;
+	$$dst .= '
';
 	my $m = '';
 	my $np = 0;
 	my $seen = $req->{seen};
@@ -43,106 +66,55 @@ sub git_log_sed_end ($$) {
 		$m .= qq($s);
 	}
 	if ($np == 0) {
-		$dst .= "No commits follow";
+		$$dst .= "No commits follow";
 	} elsif ($np > 1) {
-		$dst .= "Unseen parent commits to follow (multiple choice):\n";
+		$$dst .= "Unseen parent commits to follow (multiple choice):\n";
 	} else {
-		$dst .= "Next parent to follow:\n";
+		$$dst .= "Next parent to follow:\n";
 	}
-	$dst .= $m;
-	$dst .= '
'; + $$dst .= $m; + $$dst .= '
'; } sub git_log_sed ($$) { my ($self, $req) = @_; my $buf = ''; - my $state = 'h'; - my %acache; - my $rel = $req->{relcmd}; - my $seen = $req->{seen} = {}; - my $parents = $req->{parents} = []; - my ($plinks, $id, $ai); + my $state = 0; + $req->{seen} = {}; + $req->{parents} = []; + my $hdr = {}; sub { my $dst; # $_[0] == scalar buffer, undef means EOF from "git log" - return git_log_sed_end($req, $buf) unless defined $_[0]; $dst = delete $req->{lhtml} || ''; my @tmp; - $buf .= $_[0]; - @tmp = split(/\0/, $buf, -1); - $buf = @tmp ? pop(@tmp) : ''; + if (defined $_[0]) { + $buf .= $_[0]; + @tmp = split(/\n/, $buf, -1); + $buf = @tmp ? pop(@tmp) : ''; + } else { + @tmp = split(/\n/, $buf, -1); + $buf = undef; + } - while (@tmp) { - if ($state eq 'b') { - my $bb = shift @tmp; - $state = 'B' if $bb =~ s/\Ab/\n/; - my @lines = split(/\n/, $bb); - $bb = utf8_html(pop @lines); - $dst .= utf8_html($_)."\n" for @lines; - $dst .= $bb; - } elsif ($state eq 'B') { - my $bb = shift @tmp; - if ($bb eq '') { - $state = 'BB'; - } else { - my @lines = split(/\n/, $bb); - $bb = undef; - my $last = utf8_html(pop @lines); - $dst .= utf8_html($_)."\n" for @lines; - $dst .= $last; - } - } elsif ($state eq 'BB') { - if ($tmp[0] =~ s/\A\n//s) { - $state = 'h'; - } else { - @tmp = (); - warn 'Bad state BB in log parser: ', - $req->{-debug}; - } - } elsif ($state eq 'h') { - if (scalar keys %$seen) { - $dst .= '
';
+		foreach my $l (@tmp) {
+			if ($state != STATE_BODY) {
+				$hdr->{((STATES)[$state])} = $l;
+				if (++$state == STATE_BODY) {
+					flush_log_hdr($req, \$dst, $hdr);
+					$hdr = {};
 				}
-				$id = shift @tmp;
-				$seen->{$id} = 1;
-				$state = 'p'
-			} elsif ($state eq 'p') {
-				my @p = split(/ /, shift @tmp);
-				push @$parents, @p;
-				$plinks = parent_links(@p);
-				$state = 's'
-			} elsif ($state eq 's') {
-				# FIXME: excessively long subjects OOM us
-				my $s = shift @tmp;
-				$dst .= qq();
-				$dst .= utf8_html($s);
-				$dst .= '';
-				$state = 'D'
-			} elsif ($state eq 'D') {
-				# FIXME: thousands of decorations may OOM us
-				my $D = shift @tmp;
-				if ($D =~ /\AD(.+)/) {
-					$dst .= ' (';
-					$dst .= join(', ',
-						git_dec_links($rel, $1));
-					$dst .= ')';
-				}
-				$state = 'ai';
-			} elsif ($state eq 'ai') {
-				$ai = shift @tmp;
-				$state = 'an';
-			} elsif ($state eq 'an') {
-				my $an = shift @tmp;
-				$an =~ s/\Aa// or
-					die "missing 'a' from author: $an";
-				my $ah = $acache{$an} ||= utf8_html($an);
-				$dst .= "\n- $ah @ $ai\n  commit $id$plinks\n";
-				$id = $plinks = $ai = '';
-				$state = 'b';
+				next;
+			}
+			if ($l eq "\0") {
+				$dst .= qq(
); + $state = 0; + } else { + $dst .= "\n"; + $dst .= utf8_html($l); } } - + git_log_sed_end($req, \$dst) unless defined $buf; $dst; }; } diff --git a/t/repobrowse_git_log.t b/t/repobrowse_git_log.t new file mode 100644 index 00000000..86338698 --- /dev/null +++ b/t/repobrowse_git_log.t @@ -0,0 +1,19 @@ +# Copyright (C) 2017 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +my $test = require './t/repobrowse_common_git.perl'; +use Test::More; + +test_psgi($test->{app}, sub { + my ($cb) = @_; + my $req = 'http://example.com/test.git/log'; + my $res = $cb->(GET($req)); + is($res->code, 200, 'got 200'); + is($res->header('Content-Type'), 'text/html', + 'got correct Content-Type'); + my $body = dechunk($res); + like($body, qr!!, 'valid HTML :)'); +}); + +done_testing(); -- cgit v1.2.3-24-ge0c7