diff options
author | Eric Wong <e@80x24.org> | 2020-12-05 10:11:38 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2020-12-05 21:41:52 +0000 |
commit | 525555d14118f92f86be54c683f797089c52a78d (patch) | |
tree | 59baf4193882b020ffe67da2e6e96156e86e8b41 /lib/PublicInbox/Isearch.pm | |
parent | 89990b0cfc8bbdabc8d650325d9e1bc585df8a0c (diff) | |
download | public-inbox-525555d14118f92f86be54c683f797089c52a78d.tar.gz |
isearch: emulate per-inbox search with ->ALL
Using "eidx_key:" boolean prefix to limit results to a given inbox, we can use ->ALL to emulate and replace per-Inbox xap15/[0-9] search indices. With this change, the presence of "extindex.all.topdir" in the $PI_CONFIG will cause the WWW code to use that extindex and ignore per-inbox Xapian DBs in xap15/[0-9]. Unfortunately IMAP search still requires old per-inbox indices, for now. Mapping extindex Xapian docids to per-Inbox UIDs and vice-versa is proving tricky. Fortunately, IMAP search is rarely used and optional. The RFCs don't specify expensive phrase search, either, so `indexlevel=medium' can be used in per-inbox Xapian indices to save space. For primarily WWW (and future JMAP) users; this should result in significant disk space, FD, and page cache footprint savings for large instances with many inboxes and many cross-posted messages.
Diffstat (limited to 'lib/PublicInbox/Isearch.pm')
-rw-r--r-- | lib/PublicInbox/Isearch.pm | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm new file mode 100644 index 00000000..0ab3b19a --- /dev/null +++ b/lib/PublicInbox/Isearch.pm @@ -0,0 +1,87 @@ +# Copyright (C) 2020 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# Provides everything the PublicInbox::Search object does; +# but uses global ExtSearch (->ALL) with an eidx_key query to +# emulate per-Inbox search using ->ALL. +package PublicInbox::Isearch; +use strict; +use v5.10.1; +use PublicInbox::ExtSearch; +use PublicInbox::Search; + +sub new { + my (undef, $ibx, $es) = @_; + bless { es => $es, eidx_key => $ibx->eidx_key }, __PACKAGE__; +} + +sub mset { + my ($self, $str, $opt) = @_; + $self->{es}->mset($str, { $opt ? %$opt : (), + eidx_key => $self->{eidx_key} }); +} + +sub _ibx_id ($) { + my ($self) = @_; + my $sth = $self->{es}->over->dbh->prepare_cached(<<'', undef, 1); +SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1 + + $sth->execute($self->{eidx_key}); + $sth->fetchrow_array // + die "E: `$self->{eidx_key}' not in $self->{es}->{topdir}\n"; +} + +sub mset_to_artnums { + my ($self, $mset) = @_; + my $docids = PublicInbox::Search::mset_to_artnums($self->{es}, $mset); + my $ibx_id = $self->{-ibx_id} //= _ibx_id($self); + my $qmarks = join(',', map { '?' } @$docids); + my $rows = $self->{es}->over->dbh-> + selectall_arrayref(<<"", undef, $ibx_id, @$docids); +SELECT docid,xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks) + + my $i = -1; + my %order = map { $_ => ++$i } @$docids; + my @xnums; + for my $row (@$rows) { # @row = ($docid, $xnum) + my $idx = delete($order{$row->[0]}) // next; + $xnums[$idx] = $row->[1]; + } + if (scalar keys %order) { + warn "W: $self->{es}->{topdir} #", + join(', #', sort keys %order), + " not mapped to `$self->{eidx_key}'\n"; + warn "W: $self->{es}->{topdir} may need to be reindexed\n"; + @xnums = grep { defined } @xnums; + } + \@xnums; +} + +sub mset_to_smsg { + my ($self, $ibx, $mset) = @_; # $ibx is a real inbox, not eidx + my $xnums = mset_to_artnums($self, $mset); + my $i = -1; + my %order = map { $_ => ++$i } @$xnums; + my $unordered = $ibx->over->get_all(@$xnums); + my @msgs; + for my $smsg (@$unordered) { + my $idx = delete($order{$smsg->{num}}) // do { + warn "W: $ibx->{inboxdir} #$smsg->{num}\n"; + next; + }; + $msgs[$idx] = $smsg; + } + if (scalar keys %order) { + warn "W: $ibx->{inboxdir} #", + join(', #', sort keys %order), + " no longer valid\n"; + warn "W: $self->{es}->{topdir} may need to be reindexed\n"; + } + wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs; +} + +sub has_threadid { 1 } + +sub help { $_[0]->{es}->help } + +1; |