From 4b551c884a648b45ec6b5465efd9fb67f85f0055 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 5 Dec 2020 11:10:45 +0000 Subject: imap: support isearch and reduce Xapian queries Since IMAP search (either with Isearch or traditional per-Inbox search) only returns UIDs, we can safely set the limit to the UID slice size(*). With isearch, we can also trust the Xapian result to fit any docid range we specify. Limiting Xapian results to 1000 was making ->ALL docid <=> per-Inbox UID impossible since results could overlap between ranges unpredictably. Finally, we can map the ->ALL docids into per-Inbox UIDs and show them to the client in the UID order of the Inbox, not the docid order of the ->ALL extindex. This also lets us get rid of the "uid:" query parser prefix and use the Xapian::Query API directly to reduce our search prefix footprint. For mbox.gz downloads in WWW, we'll also make a best effort to preserve the order from the Inbox, not the order of extindex; though it's possible large result sets can have non-overlapping windows. (*) by definition, UID slice size is a "safe" value which shouldn't OOM either the server or clients. --- lib/PublicInbox/Isearch.pm | 54 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 7 deletions(-) (limited to 'lib/PublicInbox/Isearch.pm') diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm index 0ab3b19a..8a1f257a 100644 --- a/lib/PublicInbox/Isearch.pm +++ b/lib/PublicInbox/Isearch.pm @@ -15,12 +15,6 @@ sub new { bless { es => $es, eidx_key => $ibx->eidx_key }, __PACKAGE__; } -sub mset { - my ($self, $str, $opt) = @_; - $self->{es}->mset($str, { $opt ? %$opt : (), - eidx_key => $self->{eidx_key} }); -} - sub _ibx_id ($) { my ($self) = @_; my $sth = $self->{es}->over->dbh->prepare_cached(<<'', undef, 1); @@ -31,11 +25,57 @@ SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1 die "E: `$self->{eidx_key}' not in $self->{es}->{topdir}\n"; } + +sub mset { + my ($self, $str, $opt) = @_; + my %opt = $opt ? %$opt : (); + $opt{eidx_key} = $self->{eidx_key}; + if (my $uid_range = $opt{uid_range}) { + my ($beg, $end) = @$uid_range; + my $ibx_id = $self->{-ibx_id} //= _ibx_id($self); + my $dbh = $self->{es}->{over}->dbh; + my $sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT MIN(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ? + + $sth->execute($ibx_id, $beg, $end); + my @r = ($sth->fetchrow_array); + + $sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT MAX(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ? + + $sth->execute($ibx_id, $beg, $end); + $r[1] = $sth->fetchrow_array; + if (defined($r[1]) && defined($r[0])) { + $opt{limit} = $r[1] - $r[0] + 1; + } else { + $r[1] //= 0xffffffff; + $r[0] //= 0; + } + $opt{uid_range} = \@r; + } + $self->{es}->mset($str, \%opt); +} + sub mset_to_artnums { - my ($self, $mset) = @_; + my ($self, $mset, $opt) = @_; my $docids = PublicInbox::Search::mset_to_artnums($self->{es}, $mset); my $ibx_id = $self->{-ibx_id} //= _ibx_id($self); my $qmarks = join(',', map { '?' } @$docids); + if ($opt && ($opt->{mset} // 0) == 2) { # opt->{mset} = 2 was used + my $range = ''; + my @r; + if (my $r = $opt->{uid_range}) { + $range = 'AND xnum >= ? AND xnum <= ?'; + @r = @$r; + } + my $rows = $self->{es}->over->dbh-> + selectall_arrayref(<<"", undef, $ibx_id, @$docids, @r); +SELECT xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks) $range +ORDER BY xnum ASC + + return [ map { $_->[0] } @$rows ]; + } + my $rows = $self->{es}->over->dbh-> selectall_arrayref(<<"", undef, $ibx_id, @$docids); SELECT docid,xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks) -- cgit v1.2.3-24-ge0c7