diff options
author | Eric Wong <e@80x24.org> | 2023-12-08 03:54:33 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2023-12-09 02:03:47 +0000 |
commit | 305e237b9e22ffed1b009f8f2b12bf99e8856748 (patch) | |
tree | ed5f2b4e3beb4d0d048088dc99483ac332f3841a /lib/PublicInbox/SearchIdx.pm | |
parent | c5515401eb4f51998c8444afee9e8f6db1e8d514 (diff) | |
download | public-inbox-305e237b9e22ffed1b009f8f2b12bf99e8856748.tar.gz |
Xapian has always sorted termlist iterators, so we now: 1) break out of the iterator loop early on non-matches 2) avoid doing sorting ourselves As a result, we'll also favor the wantarray forms of xap_terms and all_terms to preserve sort order in most cases. Confirmed by the Xapian maintainer: <20231201184844.GO4059@survex.com> Link: https://lists.xapian.org/pipermail/xapian-discuss/2023-December/010013.html
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 1bf471fc..1ac8e33e 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -42,7 +42,7 @@ my $BASE85 = qr/[a-zA-Z0-9\!\#\$\%\&\(\)\*\+\-;<=>\?\@\^_`\{\|\}\~]+/; my $xapianlevels = qr/\A(?:full|medium)\z/; my $hex = '[a-f0-9]'; my $OID = $hex .'{40,}'; -my @VMD_MAP = (kw => 'K', L => 'L'); +my @VMD_MAP = (kw => 'K', L => 'L'); # value order matters our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/; sub new { @@ -608,17 +608,16 @@ sub set_vmd { my ($self, $docid, $vmd) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; - my ($end, @rm, @add); + my ($v, @rm, @add); my @x = @VMD_MAP; + my ($cur, $end) = ($doc->termlist_begin, $doc->termlist_end); while (my ($field, $pfx) = splice(@x, 0, 2)) { my $set = $vmd->{$field} // next; my %keep = map { $_ => 1 } @$set; my %add = %keep; - $end //= $doc->termlist_end; - for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) { - $cur->skip_to($pfx); - last if $cur == $end; - my $v = $cur->get_termname; + $cur->skip_to($pfx); # works due to @VMD_MAP order + for (; $cur != $end; $cur++) { + $v = $cur->get_termname; $v =~ s/\A$pfx//s or next; $keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v); } |