From 305e237b9e22ffed1b009f8f2b12bf99e8856748 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 8 Dec 2023 03:54:33 +0000 Subject: *search: simplify handling of Xapian term iterators Xapian has always sorted termlist iterators, so we now: 1) break out of the iterator loop early on non-matches 2) avoid doing sorting ourselves As a result, we'll also favor the wantarray forms of xap_terms and all_terms to preserve sort order in most cases. Confirmed by the Xapian maintainer: <20231201184844.GO4059@survex.com> Link: https://lists.xapian.org/pipermail/xapian-discuss/2023-December/010013.html --- lib/PublicInbox/Search.pm | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'lib/PublicInbox/Search.pm') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 8ef17d58..678c8c5d 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -614,16 +614,16 @@ sub get_pct ($) { # mset item sub xap_terms ($$;@) { my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty () - my %ret; my $end = $xdb_or_doc->termlist_end(@docid); my $cur = $xdb_or_doc->termlist_begin(@docid); + $cur->skip_to($pfx); + my (@ret, $tn); + my $pfxlen = length($pfx); for (; $cur != $end; $cur++) { - $cur->skip_to($pfx); - last if $cur == $end; - my $tn = $cur->get_termname; - $ret{substr($tn, length($pfx))} = undef if !index($tn, $pfx); + $tn = $cur->get_termname; + index($tn, $pfx) ? last : push(@ret, substr($tn, $pfxlen)); } - wantarray ? sort(keys(%ret)) : \%ret; + wantarray ? @ret : +{ map { $_ => undef } @ret }; } # get combined docid from over.num: @@ -638,11 +638,12 @@ sub all_terms { my ($self, $pfx) = @_; my $cur = xdb($self)->allterms_begin($pfx); my $end = $self->{xdb}->allterms_end($pfx); - my %ret; + my $pfxlen = length($pfx); + my @ret; for (; $cur != $end; $cur++) { - $ret{substr($cur->get_termname, length($pfx))} = undef; + push @ret, substr($cur->get_termname, $pfxlen); } - wantarray ? (sort keys %ret) : \%ret; + wantarray ? @ret : +{ map { $_ => undef } @ret }; } sub xh_args { # prep getopt args to feed to xap_helper.h socket -- cgit v1.2.3-24-ge0c7