From 4ffad9b0a60f40ee9717e22000c233fcba30b30d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 21 Feb 2023 11:17:58 +0000 Subject: viewvcs: handle non-UTF-8 commit message Back in the old days, git didn't store commit encodings and allowed messages in various encodings to enter history. Assuming such a commit is UTF-8 trips up s/// operations on buffers read with the `:utf8' PerlIO layer. So clear Perl's internal UTF-8 flag if we end up with something which isn't valid UTF-8 An example is commit 7eb93c89651c47c8095d476251f2e4314656b292 in git.git ([PATCH] Simplify git script, 2005-09-07) --- lib/PublicInbox/ViewVCS.pm | 4 +++- t/solver_git.t | 40 +++++++++++++++++++++++++++++++++++++--- xt/solver.t | 1 + 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index 5fd46610..de8600ee 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -157,9 +157,11 @@ sub show_commit_start { # ->psgi_qx callback } my $patchid = (split(/ /, $$bref))[0]; # ignore commit $ctx->{-q_value_html} = "patchid:$patchid" if defined $patchid; - open my $fh, '<:utf8', "$ctx->{-tmp}/h" or + open my $fh, '<', "$ctx->{-tmp}/h" or die "open $ctx->{-tmp}/h: $!"; chop(my $buf = do { local $/ = "\0"; <$fh> }); + utf8::decode($buf); + utf8::valid($buf) or utf8::encode($buf); # non-UTF-8 commits exist chomp $buf; my ($P, $p); ($P, $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9); diff --git a/t/solver_git.t b/t/solver_git.t index c65d9785..e8d9feb9 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -218,14 +218,13 @@ SKIP: { my %oid; # (small|big) => OID my $lk = bless { lock_path => $l }, 'PublicInbox::Lock'; my $acq = $lk->lock_for_scope; - my $stamp = "$binfoo/stamp"; + my $stamp = "$binfoo/stamp-"; if (open my $fh, '<', $stamp) { %oid = map { chomp; split(/=/, $_) } (<$fh>); } else { PublicInbox::Import::init_bare($binfoo); my $cmd = [ qw(git hash-object -w --stdin) ]; my $env = { GIT_DIR => $binfoo }; - open my $fh, '>', "$stamp.$$" or BAIL_OUT; while (my ($label, $size) = each %bin) { pipe(my ($rin, $win)) or BAIL_OUT; my $rout = popen_rd($cmd , $env, { 0 => $rin }); @@ -234,9 +233,33 @@ SKIP: { close $win or BAIL_OUT; chomp(my $x = <$rout>); close $rout or BAIL_OUT "$?"; - print $fh "$label=$x\n" or BAIL_OUT; $oid{$label} = $x; } + + open my $null, '<', '/dev/null' or xbail "open /dev/null: $!"; + my $t = xqx([qw(git mktree)], $env, { 0 => $null }); + xbail "mktree: $?" if $?; + chomp($t); + my $non_utf8 = "K\x{e5}g"; + $env->{GIT_AUTHOR_NAME} = $non_utf8; + $env->{GIT_AUTHOR_EMAIL} = 'e@example.com'; + $env->{GIT_COMMITTER_NAME} = $env->{GIT_AUTHOR_NAME}; + $env->{GIT_COMMITTER_EMAIL} = $env->{GIT_AUTHOR_EMAIL}; + my $in = \"$non_utf8\n\nK\x{e5}g\n"; + my $c = xqx([qw(git commit-tree), $t], $env, { 0 => $in }); + xbail "commit-tree: $?" if $?; + chomp($c); + $oid{'iso-8859-1'} = $c; + + $c = xqx([qw(git commit-tree -p), $c, $t], $env, { 0 => $in }); + xbail "commit-tree: $?" if $?; + chomp($c); + $oid{'8859-parent'} = $c; + + open my $fh, '>', "$stamp.$$" or BAIL_OUT; + while (my ($k, $v) = each %oid) { + print $fh "$k=$v\n" or xbail "print: $!"; + } close $fh or BAIL_OUT; rename("$stamp.$$", $stamp) or BAIL_OUT; } @@ -331,6 +354,17 @@ EOF open STDERR, '>&', $olderr or xbail "open: $!"; is($res->code, 200, 'coderepo summary (binfoo)'); ok(!-s "$tmpdir/stderr.log"); + + $res = $cb->(GET("/binfoo/$oid{'iso-8859-1'}/s/")); + is($res->code, 200, 'ISO-8859-1 commit'); + like($res->content, qr/Kåg/, 'ISO-8859-1 commit message'); + ok(!-s "$tmpdir/stderr.log", 'nothing in stderr'); + + $res = $cb->(GET("/binfoo/$oid{'8859-parent'}/s/")); + is($res->code, 200, 'commit w/ ISO-8859-parent'); + like($res->content, qr/Kåg/, 'ISO-8859-1 commit message'); + ok(!-s "$tmpdir/stderr.log", 'nothing in stderr'); + $res = $cb->(GET('/public-inbox/')); is($res->code, 200, 'coderepo summary (public-inbox)'); diff --git a/xt/solver.t b/xt/solver.t index 1b0af3d8..1f004bf5 100644 --- a/xt/solver.t +++ b/xt/solver.t @@ -30,6 +30,7 @@ my $todo = { '96f1c7f/s/', # TODO: b=contrib/completion/git-completion.bash 'b76f2c0/s/?b=po/zh_CN.po', 'c2f3bf071ee90b01f2d629921bb04c4f798f02fa/s/', # tag + '7eb93c89651c47c8095d476251f2e4314656b292/s/', # non-UTF-8 ], }; -- cgit v1.2.3-24-ge0c7