From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS200651 185.100.86.0/24 X-Spam-Status: No, score=-1.7 required=3.0 tests=AWL,BAYES_00, RCVD_IN_MSPIKE_BL,RCVD_IN_MSPIKE_ZBI,RCVD_IN_XBL,RDNS_NONE,SPF_FAIL, SPF_HELO_FAIL,TO_EQ_FM_DOM_SPF_FAIL shortcircuit=no autolearn=no autolearn_force=no version=3.4.0 Received: from 80x24.org (unknown [185.100.86.86]) by dcvr.yhbt.net (Postfix) with ESMTP id 6A4F51FD99 for ; Thu, 18 Aug 2016 02:02:54 +0000 (UTC) From: Eric Wong To: spew@80x24.org Subject: [PATCH] linkify: be stricter about matching RFC 3986 Date: Thu, 18 Aug 2016 02:02:50 +0000 Message-Id: <20160818020250.15547-1-e@80x24.org> List-Id: We're not to-the-letter about percent-encoding, but we should allow all the characters. This is mainly so we can effectively use the link to some Wikipedia pages with parentheses in them: https://en.wikipedia.org/wiki/Atom_(standard) https://en.wikipedia.org/wiki/Git_(software) --- lib/PublicInbox/Linkify.pm | 5 ++++- t/linkify.t | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index d4df689..ea7fd71 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -17,7 +17,10 @@ use Digest::SHA qw/sha1_hex/; my $SALT = rand; my $LINK_RE = qr{\b((?:ftps?|https?|nntps?|gopher):// [\@:\w\.-]+/ - ?[!,:~\$\@\w\+\&\?\.\%\;/#=-]*)}x; + (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) + (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? + (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? + )}xi; sub new { bless {}, shift } diff --git a/t/linkify.t b/t/linkify.t index 586691a..ec0d515 100644 --- a/t/linkify.t +++ b/t/linkify.t @@ -23,4 +23,38 @@ use PublicInbox::Linkify; is($s, qq($u;), 'trailing semicolon not in URL'); } +{ + my $l = PublicInbox::Linkify->new; + my $u = 'http://example.com/url-with-(parens)'; + my $s = "hello $u world"; + $s = $l->linkify_1($s); + $s = $l->linkify_2($s); + is($s, qq(hello $u world), 'URL preserved'); + + $u .= "?query=a"; + $s = "hello $u world"; + $s = $l->linkify_1($s); + $s = $l->linkify_2($s); + is($s, qq(hello $u world), 'query preserved'); + + $u .= "#fragment"; + $s = "hello $u world"; + $s = $l->linkify_1($s); + $s = $l->linkify_2($s); + is($s, qq(hello $u world), + 'query + fragment preserved'); + + $u = "http://example.com/"; + $s = "hello $u world"; + $s = $l->linkify_1($s); + $s = $l->linkify_2($s); + is($s, qq(hello $u world), "root URL preserved"); + + $u = "http://example.com/#fragment"; + $s = "hello $u world"; + $s = $l->linkify_1($s); + $s = $l->linkify_2($s); + is($s, qq(hello $u world), "root + fragment"); +} + done_testing(); -- EW