From 86c018672f6bf9739a76489c8870c151d338fc15 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 10 Dec 2016 01:09:46 +0000 Subject: search: favor In-Reply-To over last References iff IRT exists Some email clients set the References headers backwards, so trust the In-Reply-To header if (and only if) it exists and is parseable as direct parent of the current message. For affected repos, this will require reindexing (via "public-inbox-index --reindex"), but there will be no version bump for this bugfix. --- lib/PublicInbox/SearchIdx.pm | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'lib/PublicInbox/SearchIdx.pm') diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 4aac0281..832d1cbf 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -293,10 +293,10 @@ sub link_message { my $hdr = $mime->header_obj; my $refs = $hdr->header_raw('References'); my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : (); - if (my $irt = $hdr->header_raw('In-Reply-To')) { - # last References should be $irt - # we will de-dupe later - push @refs, mid_clean($irt); + my $irt = $hdr->header_raw('In-Reply-To'); + if (defined $irt) { + $irt = mid_clean($irt); + $irt = undef if $mid eq $irt; } my $tid; @@ -305,6 +305,15 @@ sub link_message { my @orig_refs = @refs; @refs = (); + if (defined $irt) { + # to check MAX_MID_SIZE + push @orig_refs, $irt; + + # below, we will ensure IRT (if specified) + # is the last References + $uniq{$irt} = 1; + } + # prevent circular references via References: here: foreach my $ref (@orig_refs) { if (length($ref) > MAX_MID_SIZE) { @@ -315,6 +324,11 @@ sub link_message { push @refs, $ref; } } + + # last References should be IRT, but some mail clients do things + # out of order, so trust IRT over References iff IRT exists + push @refs, $irt if defined $irt; + if (@refs) { $smsg->{references} = '<'.join('> <', @refs).'>'; -- cgit v1.2.3-24-ge0c7