diff options
author | Eric Wong (Contractor, The Linux Foundation) <e@80x24.org> | 2018-03-29 09:57:52 +0000 |
---|---|---|
committer | Eric Wong (Contractor, The Linux Foundation) <e@80x24.org> | 2018-03-29 10:00:04 +0000 |
commit | 11707dae97d1f4638157cfee298464b2f2deeed4 (patch) | |
tree | a25db85d1ab0e5e4daeb099140d31f1c07fd0c5c | |
parent | 821ed7c40b7b50ceb1c942af5e14d168995d514e (diff) | |
download | public-inbox-11707dae97d1f4638157cfee298464b2f2deeed4.tar.gz |
Too many similar functions doing the same basic thing was redundant and misleading, especially since Message-ID is no longer treated as a truly unique identifier. For displaying threads in the HTML, this makes it clear that we favor the primary Message-ID mapped to an NNTP article number if a message cannot be found.
-rw-r--r-- | lib/PublicInbox/Inbox.pm | 22 | ||||
-rw-r--r-- | lib/PublicInbox/Search.pm | 56 | ||||
-rw-r--r-- | lib/PublicInbox/SearchThread.pm | 14 | ||||
-rw-r--r-- | lib/PublicInbox/SearchView.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/View.pm | 12 | ||||
-rw-r--r-- | t/search-thr-index.t | 3 | ||||
-rw-r--r-- | t/search.t | 6 |
7 files changed, 35 insertions, 80 deletions
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 4c7305f9..01aa500c 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -293,20 +293,20 @@ sub path_check { git($self)->check('HEAD:'.$path); } +sub smsg_by_mid ($$) { + my ($self, $mid) = @_; + my $srch = search($self) or return; + # favor the Message-ID we used for the NNTP article number: + my $mm = mm($self) or return; + my $num = $mm->num_for($mid); + $srch->lookup_article($num); +} + sub msg_by_mid ($$;$) { my ($self, $mid, $ref) = @_; my $srch = search($self) or - return msg_by_path($self, mid2path($mid), $ref); - my $smsg; - # favor the Message-ID we used for the NNTP article number: - if (my $mm = mm($self)) { - my $num = $mm->num_for($mid); - $smsg = $srch->lookup_article($num); - } else { - $smsg = $srch->retry_reopen(sub { - $srch->lookup_skeleton($mid) and $smsg->load_expand; - }); - } + return msg_by_path($self, mid2path($mid), $ref); + my $smsg = smsg_by_mid($self, $mid); $smsg ? msg_by_smsg($self, $smsg, $ref) : undef; } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 584a508e..7d42aaad 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -18,7 +18,7 @@ use constant YYYYMMDD => 5; # for searching in the WWW UI use Search::Xapian qw/:standard/; use PublicInbox::SearchMsg; use PublicInbox::MIME; -use PublicInbox::MID qw/mid_clean id_compress/; +use PublicInbox::MID qw/id_compress/; # This is English-only, everything else is non-standard and may be confused as # a prefix common in patch emails @@ -193,9 +193,8 @@ sub query { sub get_thread { my ($self, $mid, $opts) = @_; - my $smsg = retry_reopen($self, sub { lookup_skeleton($self, $mid) }); - - return { total => 0, msgs => [] } unless $smsg; + my $smsg = first_smsg_by_mid($self, $mid) or + return { total => 0, msgs => [] }; my $qtid = Search::Xapian::Query->new('G' . $smsg->thread_id); my $path = $smsg->path; if (defined $path && $path ne '') { @@ -346,48 +345,13 @@ sub query_ts { _do_enquire($self, $query, $opts); } -sub lookup_skeleton { +sub first_smsg_by_mid { my ($self, $mid) = @_; - my $skel = $self->{skel} or return lookup_message($self, $mid); - $mid = mid_clean($mid); - my $term = 'Q' . $mid; my $smsg; - my $beg = $skel->postlist_begin($term); - if ($beg != $skel->postlist_end($term)) { - my $doc_id = $beg->get_docid; - if (defined $doc_id) { - # raises on error: - my $doc = $skel->get_document($doc_id); - $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); - $smsg->{doc_id} = $doc_id; - } - } + each_smsg_by_mid($self, $mid, sub { $smsg = $_[0]; undef }); $smsg; } -sub lookup_message { - my ($self, $mid) = @_; - $mid = mid_clean($mid); - - my $doc_id = $self->find_first_doc_id('Q' . $mid); - my $smsg; - if (defined $doc_id) { - # raises on error: - my $doc = $self->{xdb}->get_document($doc_id); - $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); - $smsg->{doc_id} = $doc_id; - } - $smsg; -} - -sub lookup_mail { # no ghosts! - my ($self, $mid) = @_; - retry_reopen($self, sub { - my $smsg = lookup_skeleton($self, $mid) or return; - $smsg->load_expand; - }); -} - sub lookup_article { my ($self, $num) = @_; my $term = 'XNUM'.$num; @@ -447,16 +411,6 @@ sub find_doc_ids { ($db->postlist_begin($termval), $db->postlist_end($termval)); } -sub find_first_doc_id { - my ($self, $termval) = @_; - - my ($begin, $end) = $self->find_doc_ids($termval); - - return undef if $begin->equal($end); # not found - - $begin->get_docid; -} - # normalize subjects so they are suitable as pathnames for URLs # XXX: consider for removal sub subject_path { diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 6fbce15c..1d250b46 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -22,15 +22,15 @@ use strict; use warnings; sub thread { - my ($messages, $ordersub, $srch) = @_; + my ($messages, $ordersub, $ibx) = @_; my $id_table = {}; _add_message($id_table, $_) foreach @$messages; my $rootset = [ grep { - !delete($_->{parent}) && $_->visible($srch) + !delete($_->{parent}) && $_->visible($ibx) } values %$id_table ]; $id_table = undef; $rootset = $ordersub->($rootset); - $_->order_children($ordersub, $srch) for @$rootset; + $_->order_children($ordersub, $ibx) for @$rootset; $rootset; } @@ -131,20 +131,20 @@ sub has_descendent { # a ghost Message-ID is the result of a long header line # being folded/mangled by a MUA, and not a missing message. sub visible ($$) { - my ($self, $srch) = @_; - ($self->{smsg} ||= eval { $srch->lookup_mail($self->{id}) }) || + my ($self, $ibx) = @_; + ($self->{smsg} ||= eval { $ibx->smsg_by_mid($self->{id}) }) || (scalar values %{$self->{children}}); } sub order_children { - my ($cur, $ordersub, $srch) = @_; + my ($cur, $ordersub, $ibx) = @_; my %seen = ($cur => 1); # self-referential loop prevention my @q = ($cur); while (defined($cur = shift @q)) { my $c = $cur->{children}; # The hashref here... - $c = [ grep { !$seen{$_}++ && visible($_, $srch) } values %$c ]; + $c = [ grep { !$seen{$_}++ && visible($_, $ibx) } values %$c ]; $c = $ordersub->($c) if scalar @$c > 1; $cur->{children} = $c; # ...becomes an arrayref push @q, @$c; diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 1a8fe7f7..c7897958 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -228,7 +228,7 @@ sub mset_thread { my $r = $q->{r}; my $rootset = PublicInbox::SearchThread::thread($msgs, $r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ds, - $srch); + $ctx); my $skel = search_nav_bot($mset, $q). "<pre>"; my $inbox = $ctx->{-inbox}; $ctx->{-upfx} = ''; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index aad860e9..f5b278c2 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -430,7 +430,7 @@ sub thread_html { $ctx->{mapping} = {}; $ctx->{s_nr} = "$nr+ messages in thread"; - my $rootset = thread_results($msgs, $srch); + my $rootset = thread_results($ctx, $msgs); # reduce hash lookups in pre_thread->skel_dump my $inbox = $ctx->{-inbox}; @@ -686,7 +686,7 @@ sub thread_skel { # reduce hash lookups in skel_dump my $ibx = $ctx->{-inbox}; $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; - walk_thread(thread_results($sres, $srch), $ctx, *skel_dump); + walk_thread(thread_results($ctx, $sres), $ctx, *skel_dump); $ctx->{parent_msg} = $parent; } @@ -809,9 +809,9 @@ sub load_results { } sub thread_results { - my ($msgs, $srch) = @_; + my ($ctx, $msgs) = @_; require PublicInbox::SearchThread; - PublicInbox::SearchThread::thread($msgs, *sort_ds, $srch); + PublicInbox::SearchThread::thread($msgs, *sort_ds, $ctx->{-inbox}); } sub missing_thread { @@ -952,7 +952,7 @@ sub acc_topic { my ($ctx, $level, $node) = @_; my $srch = $ctx->{srch}; my $mid = $node->{id}; - my $x = $node->{smsg} || $srch->lookup_mail($mid); + my $x = $node->{smsg} || $ctx->{-inbox}->smsg_by_mid($mid); my ($subj, $ds); my $topic; if ($x) { @@ -1078,7 +1078,7 @@ sub index_topics { my $nr = scalar @{$sres->{msgs}}; if ($nr) { $sres = load_results($srch, $sres); - walk_thread(thread_results($sres, $srch), $ctx, *acc_topic); + walk_thread(thread_results($ctx, $sres), $ctx, *acc_topic); } $ctx->{-next_o} = $off+ $nr; $ctx->{-cur_o} = $off; diff --git a/t/search-thr-index.t b/t/search-thr-index.t index 6c6e4c57..9549976d 100644 --- a/t/search-thr-index.t +++ b/t/search-thr-index.t @@ -4,6 +4,7 @@ use strict; use warnings; use Test::More; use File::Temp qw/tempdir/; +use PublicInbox::MID qw(mids); use Email::MIME; eval { require PublicInbox::SearchIdx; }; plan skip_all => "Xapian missing for search" if $@; @@ -41,7 +42,7 @@ foreach (reverse split(/\n\n/, $data)) { $mime->header_set('From' => 'bw@g'); $mime->header_set('To' => 'git@vger.kernel.org'); my $bytes = bytes::length($mime->as_string); - my $mid = $mime->header('Message-Id'); + my $mid = mids($mime->header_obj)->[0]; my $doc_id = $rw->add_message($mime, $bytes, ++$num, 'ignored', $mid); push @mids, $mid; ok($doc_id, 'message added: '. $mid); @@ -89,7 +89,7 @@ sub filter_mids { { $rw_commit->(); $ro->reopen; - my $found = $ro->lookup_message('<root@s>'); + my $found = $ro->first_smsg_by_mid('root@s'); ok($found, "message found"); is($root_id, $found->{doc_id}, 'doc_id set correctly'); is($found->mid, 'root@s', 'mid set correctly'); @@ -264,7 +264,7 @@ sub filter_mids { ], body => "LOOP!\n")); ok($doc_id > 0, "doc_id defined with circular reference"); - my $smsg = $rw->lookup_message('circle@a'); + my $smsg = $rw->first_smsg_by_mid('circle@a'); is($smsg->references, '', "no references created"); my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc}); is($s, $msg->subject, 'long subject not rewritten'); @@ -281,7 +281,7 @@ sub filter_mids { my $mime = Email::MIME->new($str); my $doc_id = $rw->add_message($mime); ok($doc_id > 0, 'message indexed doc_id with UTF-8'); - my $smsg = $rw->lookup_message('testmessage@example.com'); + my $smsg = $rw->first_smsg_by_mid('testmessage@example.com'); my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc}); is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved'); |