about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-12-08 03:54:33 +0000
committerEric Wong <e@80x24.org>2023-12-09 02:03:47 +0000
commit305e237b9e22ffed1b009f8f2b12bf99e8856748 (patch)
treeed5f2b4e3beb4d0d048088dc99483ac332f3841a /lib/PublicInbox/SearchIdx.pm
parentc5515401eb4f51998c8444afee9e8f6db1e8d514 (diff)
downloadpublic-inbox-305e237b9e22ffed1b009f8f2b12bf99e8856748.tar.gz
Xapian has always sorted termlist iterators, so we now:

1) break out of the iterator loop early on non-matches
2) avoid doing sorting ourselves

As a result, we'll also favor the wantarray forms of xap_terms
and all_terms to preserve sort order in most cases.

Confirmed by the Xapian maintainer: <20231201184844.GO4059@survex.com>

Link: https://lists.xapian.org/pipermail/xapian-discuss/2023-December/010013.html
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm13
1 files changed, 6 insertions, 7 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 1bf471fc..1ac8e33e 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -42,7 +42,7 @@ my $BASE85 = qr/[a-zA-Z0-9\!\#\$\%\&\(\)\*\+\-;<=>\?\@\^_`\{\|\}\~]+/;
 my $xapianlevels = qr/\A(?:full|medium)\z/;
 my $hex = '[a-f0-9]';
 my $OID = $hex .'{40,}';
-my @VMD_MAP = (kw => 'K', L => 'L');
+my @VMD_MAP = (kw => 'K', L => 'L'); # value order matters
 our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/;
 
 sub new {
@@ -608,17 +608,16 @@ sub set_vmd {
         my ($self, $docid, $vmd) = @_;
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
-        my ($end, @rm, @add);
+        my ($v, @rm, @add);
         my @x = @VMD_MAP;
+        my ($cur, $end) = ($doc->termlist_begin, $doc->termlist_end);
         while (my ($field, $pfx) = splice(@x, 0, 2)) {
                 my $set = $vmd->{$field} // next;
                 my %keep = map { $_ => 1 } @$set;
                 my %add = %keep;
-                $end //= $doc->termlist_end;
-                for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
-                        $cur->skip_to($pfx);
-                        last if $cur == $end;
-                        my $v = $cur->get_termname;
+                $cur->skip_to($pfx); # works due to @VMD_MAP order
+                for (; $cur != $end; $cur++) {
+                        $v = $cur->get_termname;
                         $v =~ s/\A$pfx//s or next;
                         $keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v);
                 }