From 402234aa4eaf4732e5e1636099115465ec6e5c15 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 31 Jan 2023 10:31:57 +0000 Subject: www: diff: fix encoding problems when showing diff We need to use the utf8 layer when writing files to be diffed, and utf8::decode the `git diff' output. Furthermore, do the CRLF > LF conversion early to avoid showing CRLF vs LF differences in the diff, since that doesn't matter to MUAs (nor our normal HTML views) --- lib/PublicInbox/MailDiff.pm | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm index a0ecef9f..7511144c 100644 --- a/lib/PublicInbox/MailDiff.pm +++ b/lib/PublicInbox/MailDiff.pm @@ -19,8 +19,10 @@ sub write_part { # Eml->each_part callback my $ct = $part->content_type || 'text/plain'; my ($s, $err) = msg_part_text($part, $ct); my $sfx = defined($s) ? 'txt' : 'bin'; - open my $fh, '>', "$self->{curdir}/$idx.$sfx" or die "open: $!"; - print $fh ($s // $part->body) or die "print $!"; + $s //= $part->body; + $s =~ s/\r+\n/\n/sg; + open my $fh, '>:utf8', "$self->{curdir}/$idx.$sfx" or die "open: $!"; + print $fh $s or die "print $!"; close $fh or die "close $!"; } @@ -66,9 +68,9 @@ sub next_smsg ($) { sub emit_msg_diff { my ($bref, $self) = @_; # bref is `git diff' output # will be escaped to `•' in HTML + utf8::decode($$bref); $self->{ctx}->{ibx}->{obfuscate} and obfuscate_addrs($self->{ctx}->{ibx}, $$bref, "\x{2022}"); - $$bref =~ s/\r+\n/\n/sg; print { $self->{ctx}->{zfh} } '
' if $self->{nr} > 1;
 	flush_diff($self->{ctx}, $bref);
 	next_smsg($self);
-- 
cgit v1.2.3-24-ge0c7