From 4030525cb228eb3837f5260637bd7a5a861e81e2 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sat, 3 Mar 2018 05:14:33 +0000 Subject: mid: be strict with References, but loose on Message-Id Traditionally we've been more lax on parsing Message-Id and allow it without the angle brackets. We've always been strict on References and can't have it be pointlessly large when some MUA decides to use HTML-escaped angle brackets ("<", ">"). --- lib/PublicInbox/MID.pm | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) (limited to 'lib/PublicInbox/MID.pm') diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index 4ccb704d..96085399 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -49,16 +49,39 @@ sub mid2path { sub mid_mime ($) { $_[0]->header_obj->header_raw('Message-ID') } -sub uniq_mids { - my ($hdr, @fields) = @_; - my %seen; - my @raw; - foreach my $f (@fields) { - push @raw, $hdr->header_raw($f); +sub mids ($) { + my ($hdr) = @_; + my @mids; + my @v = $hdr->header_raw('Message-Id'); + foreach my $v (@v) { + my @cur = ($v =~ /<([^>]+)>/sg); + if (@cur) { + push(@mids, @cur); + } else { + push(@mids, $v); + } } - my @mids = (join(' ', @raw) =~ /<([^>]+)>/g); - my $mids = scalar(@mids) == 0 ? \@raw: \@mids; + uniq_mids(\@mids); +} + +# last References should be IRT, but some mail clients do things +# out of order, so trust IRT over References iff IRT exists +sub references ($) { + my ($hdr) = @_; + my @mids; + foreach my $f (qw(References In-Reply-To)) { + my @v = $hdr->header_raw($f); + foreach my $v (@v) { + push(@mids, ($v =~ /<([^>]+)>/sg)); + } + } + uniq_mids(\@mids); +} + +sub uniq_mids ($) { + my ($mids) = @_; my @ret; + my %seen; foreach (@$mids) { next if $seen{$_}; push @ret, $_; @@ -67,12 +90,6 @@ sub uniq_mids { \@ret; } -sub mids { uniq_mids($_[0], 'Message-Id') } - -# last References should be IRT, but some mail clients do things -# out of order, so trust IRT over References iff IRT exists -sub references { uniq_mids($_[0], 'References', 'In-Reply-To') } - # RFC3986, section 3.3: sub MID_ESC () { '^A-Za-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@' } sub mid_escape ($) { uri_escape_utf8($_[0], MID_ESC) } -- cgit v1.2.3-24-ge0c7