about summary refs log tree commit homepage
path: root/lib/PublicInbox/Isearch.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-05 11:10:45 +0000
committerEric Wong <e@80x24.org>2020-12-05 21:41:52 +0000
commit4b551c884a648b45ec6b5465efd9fb67f85f0055 (patch)
tree922b3ba8a252589e6335703499c0dc640d3950f5 /lib/PublicInbox/Isearch.pm
parent525555d14118f92f86be54c683f797089c52a78d (diff)
downloadpublic-inbox-4b551c884a648b45ec6b5465efd9fb67f85f0055.tar.gz
Since IMAP search (either with Isearch or traditional per-Inbox
search) only returns UIDs, we can safely set the limit to the
UID slice size(*).  With isearch, we can also trust the Xapian
result to fit any docid range we specify.

Limiting Xapian results to 1000 was making ->ALL docid <=>
per-Inbox UID impossible since results could overlap between
ranges unpredictably.

Finally, we can map the ->ALL docids into per-Inbox UIDs and
show them to the client in the UID order of the Inbox, not the
docid order of the ->ALL extindex.

This also lets us get rid of the "uid:" query parser prefix
and use the Xapian::Query API directly to reduce our search
prefix footprint.

For mbox.gz downloads in WWW, we'll also make a best effort to
preserve the order from the Inbox, not the order of extindex;
though it's possible large result sets can have non-overlapping
windows.

(*) by definition, UID slice size is a "safe" value which
    shouldn't OOM either the server or clients.
Diffstat (limited to 'lib/PublicInbox/Isearch.pm')
-rw-r--r--lib/PublicInbox/Isearch.pm54
1 files changed, 47 insertions, 7 deletions
diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm
index 0ab3b19a..8a1f257a 100644
--- a/lib/PublicInbox/Isearch.pm
+++ b/lib/PublicInbox/Isearch.pm
@@ -15,12 +15,6 @@ sub new {
         bless { es => $es, eidx_key => $ibx->eidx_key }, __PACKAGE__;
 }
 
-sub mset {
-        my ($self, $str, $opt) = @_;
-        $self->{es}->mset($str, { $opt ? %$opt : (),
-                                eidx_key => $self->{eidx_key} });
-}
-
 sub _ibx_id ($) {
         my ($self) = @_;
         my $sth = $self->{es}->over->dbh->prepare_cached(<<'', undef, 1);
@@ -31,11 +25,57 @@ SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
                 die "E: `$self->{eidx_key}' not in $self->{es}->{topdir}\n";
 }
 
+
+sub mset {
+        my ($self, $str, $opt) = @_;
+        my %opt = $opt ? %$opt : ();
+        $opt{eidx_key} = $self->{eidx_key};
+        if (my $uid_range = $opt{uid_range}) {
+                my ($beg, $end) = @$uid_range;
+                my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
+                my $dbh = $self->{es}->{over}->dbh;
+                my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT MIN(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
+
+                $sth->execute($ibx_id, $beg, $end);
+                my @r = ($sth->fetchrow_array);
+
+                $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT MAX(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
+
+                $sth->execute($ibx_id, $beg, $end);
+                $r[1] = $sth->fetchrow_array;
+                if (defined($r[1]) && defined($r[0])) {
+                        $opt{limit} = $r[1] - $r[0] + 1;
+                } else {
+                        $r[1] //= 0xffffffff;
+                        $r[0] //= 0;
+                }
+                $opt{uid_range} = \@r;
+        }
+        $self->{es}->mset($str, \%opt);
+}
+
 sub mset_to_artnums {
-        my ($self, $mset) = @_;
+        my ($self, $mset, $opt) = @_;
         my $docids = PublicInbox::Search::mset_to_artnums($self->{es}, $mset);
         my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
         my $qmarks = join(',', map { '?' } @$docids);
+        if ($opt && ($opt->{mset} // 0) == 2) { # opt->{mset} = 2 was used
+                my $range = '';
+                my @r;
+                if (my $r = $opt->{uid_range}) {
+                        $range = 'AND xnum >= ? AND xnum <= ?';
+                        @r = @$r;
+                }
+                my $rows = $self->{es}->over->dbh->
+                        selectall_arrayref(<<"", undef, $ibx_id, @$docids, @r);
+SELECT xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks) $range
+ORDER BY xnum ASC
+
+                return [ map { $_->[0] } @$rows ];
+        }
+
         my $rows = $self->{es}->over->dbh->
                         selectall_arrayref(<<"", undef, $ibx_id, @$docids);
 SELECT docid,xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks)