about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-06-04 02:04:21 +0000
committerEric Wong <e@80x24.org>2019-06-04 10:04:57 +0000
commit1735e5c2cf87b28b096ad91008bdb764d853b26d (patch)
tree9cdf3ae883b9df1309bee4e4cd22cd1d5bdf69d5
parentb77c87a6fce05c4f2048aa0a73fde7b25a2b0002 (diff)
downloadpublic-inbox-1735e5c2cf87b28b096ad91008bdb764d853b26d.tar.gz
The "\w" character class in Perl matches any word characters
in the Unicode database, not just ASCII characters.  So we
must be prepared for that and generate links to IDNs.
-rw-r--r--lib/PublicInbox/Linkify.pm5
-rw-r--r--t/linkify.t12
2 files changed, 15 insertions, 2 deletions
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index d4778e7d..84960a98 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -13,6 +13,7 @@ package PublicInbox::Linkify;
 use strict;
 use warnings;
 use Digest::SHA qw/sha1_hex/;
+use PublicInbox::Hval qw(ascii_html);
 
 my $SALT = rand;
 my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher)://
@@ -61,12 +62,12 @@ sub linkify_1 {
                         $end = ')';
                 }
 
+                $url = ascii_html($url); # for IDN
+
                 # salt this, as this could be exploited to show
                 # links in the HTML which don't show up in the raw mail.
                 my $key = sha1_hex($url . $SALT);
 
-                # only escape ampersands, others do not match LINK_RE
-                $url =~ s/&/&#38;/g;
                 $_[0]->{$key} = $url;
                 $beg . 'PI-LINK-'. $key . $end;
         ^ge;
diff --git a/t/linkify.t b/t/linkify.t
index fe218b91..c4923582 100644
--- a/t/linkify.t
+++ b/t/linkify.t
@@ -132,4 +132,16 @@ use PublicInbox::Linkify;
                 'punctuation with unpaired ) OK')
 }
 
+if ('IDN example: <ACDB98F4-178C-43C3-99C4-A1D03DD6A8F5@sb.org>') {
+        my $hc = '&#26376;';
+        my $u = "http://www.\x{6708}.example.com/";
+        my $s = $u;
+        my $l = PublicInbox::Linkify->new;
+        $s = $l->linkify_1($s);
+        $s = $l->linkify_2($s);
+        my $expect = qq{<a
+href="http://www.$hc.example.com/">http://www.$hc.example.com/</a>};
+        is($s, $expect, 'IDN message escaped properly');
+}
+
 done_testing();