about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--lib/PublicInbox/Eml.pm8
-rw-r--r--lib/PublicInbox/IMAP.pm2
-rw-r--r--lib/PublicInbox/Smsg.pm3
-rw-r--r--t/imapd.t28
-rw-r--r--t/psgi_search.t7
5 files changed, 41 insertions, 7 deletions
diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm
index 485f637a..8b999e1a 100644
--- a/lib/PublicInbox/Eml.pm
+++ b/lib/PublicInbox/Eml.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Lazy MIME parser, it still slurps the full message but keeps short
@@ -144,6 +144,7 @@ sub header_raw {
         my $re = re_memo($_[1]);
         my @v = (${ $_[0]->{hdr} } =~ /$re/g);
         for (@v) {
+                utf8::decode($_); # SMTPUTF8
                 # for compatibility w/ Email::Simple::Header,
                 s/\s+\z//s;
                 s/\A\s+//s;
@@ -359,14 +360,15 @@ sub header_set {
         $pfx .= ': ';
         my $len = 78 - length($pfx);
         @vals = map {;
+                utf8::encode(my $v = $_); # to bytes, support SMTPUTF8
                 # folding differs from Email::Simple::Header,
                 # we favor tabs for visibility (and space savings :P)
                 if (length($_) >= $len && (/\n[^ \t]/s || !/\n/s)) {
                         local $Text::Wrap::columns = $len;
                         local $Text::Wrap::huge = 'overflow';
-                        $pfx . wrap('', "\t", $_) . $self->{crlf};
+                        $pfx . wrap('', "\t", $v) . $self->{crlf};
                 } else {
-                        $pfx . $_ . $self->{crlf};
+                        $pfx . $v . $self->{crlf};
                 }
         } @vals;
         $$hdr =~ s!$re!shift(@vals) // ''!ge; # replace current headers, first
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index 1f65aa65..37317948 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -426,8 +426,10 @@ sub _esc ($) {
         if (!defined($v)) {
                 'NIL';
         } elsif ($v =~ /[{"\r\n%*\\\[]/) { # literal string
+                utf8::encode($v);
                 '{' . length($v) . "}\r\n" . $v;
         } else { # quoted string
+                utf8::encode($v);
                 qq{"$v"}
         }
 }
diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm
index 2026c7d9..b132381b 100644
--- a/lib/PublicInbox/Smsg.pm
+++ b/lib/PublicInbox/Smsg.pm
@@ -99,9 +99,6 @@ sub populate {
                 # to protect git and NNTP clients
                 $val =~ tr/\0\t\n/   /;
 
-                # rare: in case headers have wide chars (not RFC2047-encoded)
-                utf8::decode($val);
-
                 # lower-case fields for read-only stuff
                 $self->{lc($f)} = $val;
 
diff --git a/t/imapd.t b/t/imapd.t
index 3c74aefd..cbd6c1b9 100644
--- a/t/imapd.t
+++ b/t/imapd.t
@@ -534,6 +534,34 @@ SKIP: {
         }
 }
 
+{
+        ok(my $ic = $imap_client->new(%mic_opt), 'logged in');
+        my $mb = "$ibx[0]->{newsgroup}.$first_range";
+        ok($ic->examine($mb), "EXAMINE $mb");
+        my $uidnext = $ic->uidnext($mb); # we'll fetch BODYSTRUCTURE on this
+        my $im = $ibx[0]->importer(0);
+        $im->add(PublicInbox::Eml->new(<<EOF)) or BAIL_OUT;
+Subject: test Ævar
+Message-ID: <smtputf8-delivered-mess\@age>
+From: Ævar Arnfjörð Bjarmason <avarab\@example>
+To: git\@vger.kernel.org
+
+EOF
+        $im->done;
+        my $envl = $ic->get_envelope($uidnext);
+        is($envl->{subject}, 'test Ævar', 'UTF-8 subject');
+        is($envl->{sender}->[0]->{personalname}, 'Ævar Arnfjörð Bjarmason',
+                'UTF-8 sender[0].personalname');
+        SKIP: {
+                skip 'need compress for comparisons', 1 if !$can_compress;
+                ok($ic = $imap_client->new(%mic_opt), 'uncompressed logged in');
+                ok($ic && $ic->compress, 'compress enabled');
+                ok($ic->examine($mb), "EXAMINE $mb");
+                my $raw = $ic->get_envelope($uidnext);
+                is_deeply($envl, $raw, 'raw and compressed match');
+        }
+}
+
 $td->kill;
 $td->join;
 is($?, 0, 'no error in exited process') if !$ENV{TEST_KILL_IMAPD};
diff --git a/t/psgi_search.t b/t/psgi_search.t
index 3da93eda..8868f67e 100644
--- a/t/psgi_search.t
+++ b/t/psgi_search.t
@@ -1,5 +1,5 @@
 #!perl -w
-# Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
 use v5.10.1;
@@ -103,6 +103,11 @@ test_psgi(sub { $www->call(@_) }, sub {
                 like($res->content, $mid_re, 'found mid in response');
                 chop($digits);
         }
+        $res = $cb->(GET("/test/$mid/"));
+        $html = $res->content;
+        like($html, qr/\bFrom: &#198;var /,
+                "displayed Ævar's name properly in permalink From:");
+        unlike($html, qr/&#195;/, 'no raw octets in permalink HTML');
 
         $res = $cb->(GET('/test/'));
         $html = $res->content;