* [PATCH] www: start wiring up search to use async xap_helper
@ 2024-04-19 11:56 Eric Wong
0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2024-04-19 11:56 UTC (permalink / raw)
To: spew
The C++ version of xap_helper will allow more complex and
expensive queries. Both the Perl and C++-only version will
allow offloading search into a separate process which can be
killed via ITIMER_REAL or RLIMIT_CPU in the face of overload.
xap_helper is simplified to drop terms+data from `mset' command.
Retrieving Xapian document terms, data (and possibly values) and
transferring to the Perl side would be an increase in complexity
on both the Perl and C++ sides. It would require more I/O and
transient memory use on the Perl side, so lets ignore the
document-related stuff here for now for ease-of-development.
---
MANIFEST | 2 ++
lib/PublicInbox/Isearch.pm | 50 +++++++++++++++-----------
lib/PublicInbox/Search.pm | 23 +++++++++++-
lib/PublicInbox/SearchView.pm | 58 +++++++++++++++++-------------
lib/PublicInbox/XapClient.pm | 1 +
lib/PublicInbox/XapHelper.pm | 9 ++---
lib/PublicInbox/XhcMset.pm | 50 ++++++++++++++++++++++++++
lib/PublicInbox/XhcMsetIterator.pm | 20 +++++++++++
lib/PublicInbox/xap_helper.h | 4 +--
lib/PublicInbox/xh_mset.h | 31 ++++------------
t/cindex.t | 21 +++++------
t/xap_helper.t | 26 +++++---------
12 files changed, 188 insertions(+), 107 deletions(-)
create mode 100644 lib/PublicInbox/XhcMset.pm
create mode 100644 lib/PublicInbox/XhcMsetIterator.pm
diff --git a/MANIFEST b/MANIFEST
index 4c974338..fb175e5f 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -382,6 +382,8 @@ lib/PublicInbox/XapClient.pm
lib/PublicInbox/XapHelper.pm
lib/PublicInbox/XapHelperCxx.pm
lib/PublicInbox/Xapcmd.pm
+lib/PublicInbox/XhcMset.pm
+lib/PublicInbox/XhcMsetIterator.pm
lib/PublicInbox/gcf2_libgit2.h
lib/PublicInbox/xap_helper.h
lib/PublicInbox/xh_cidx.h
diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm
index 62112171..20808d6d 100644
--- a/lib/PublicInbox/Isearch.pm
+++ b/lib/PublicInbox/Isearch.pm
@@ -26,34 +26,44 @@ SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
sub query_approxidate { $_[0]->{es}->query_approxidate($_[1], $_[2]) }
-sub mset {
- my ($self, $str, $opt) = @_;
+sub eidx_mset_prep ($$) {
+ my ($self, $opt) = @_;
my %opt = $opt ? %$opt : ();
$opt{eidx_key} = $self->{eidx_key};
- if (my $uid_range = $opt{uid_range}) {
- my ($beg, $end) = @$uid_range;
- my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
- my $dbh = $self->{es}->over->dbh;
- my $sth = $dbh->prepare_cached(<<'', undef, 1);
+ my $uid_range = $opt{uid_range} or return \%opt;
+ my ($beg, $end) = @$uid_range;
+ my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
+ my $dbh = $self->{es}->over->dbh;
+ my $sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT MIN(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
- $sth->execute($ibx_id, $beg, $end);
- my @r = ($sth->fetchrow_array);
+ $sth->execute($ibx_id, $beg, $end);
+ my @r = ($sth->fetchrow_array);
- $sth = $dbh->prepare_cached(<<'', undef, 1);
+ $sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT MAX(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
- $sth->execute($ibx_id, $beg, $end);
- $r[1] = $sth->fetchrow_array;
- if (defined($r[1]) && defined($r[0])) {
- $opt{limit} = $r[1] - $r[0] + 1;
- } else {
- $r[1] //= $self->{es}->xdb->get_lastdocid;
- $r[0] //= 0;
- }
- $opt{uid_range} = \@r; # these are fed to Xapian and SQLite
+ $sth->execute($ibx_id, $beg, $end);
+ $r[1] = $sth->fetchrow_array;
+ if (defined($r[1]) && defined($r[0])) {
+ $opt{limit} = $r[1] - $r[0] + 1;
+ } else {
+ $r[1] //= $self->{es}->xdb->get_lastdocid;
+ $r[0] //= 0;
}
- $self->{es}->mset($str, \%opt);
+ $opt{uid_range} = \@r; # these are fed to Xapian and SQLite
+ \%opt;
+}
+
+sub mset {
+ my ($self, $str, $opt) = @_;
+ $self->{es}->mset($str, eidx_mset_prep $self, $opt);
+}
+
+sub async_mset {
+ my ($self, $str, $opt, $cb, @args) = @_;
+ $opt = eidx_mset_prep $self, $opt;
+ $self->{es}->async_mset($str, $opt, $cb, @args);
}
sub mset_to_artnums {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 678c8c5d..e83bb4c2 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -11,6 +11,7 @@ our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
use List::Util qw(max);
use POSIX qw(strftime);
use Carp ();
+our $XHC;
# values for searching, changing the numeric value breaks
# compatibility with old indices (so don't change them it)
@@ -89,9 +90,9 @@ our @XH_SPEC = (
'r', # 1=relevance then column
't', # collapse threads
'A=s@', # prefixes
- 'D', # emit docdata
'K=i', # timeout kill after i seconds
'O=s', # eidx_key
+ 'R', # emit rank
'T=i', # threadid
);
@@ -429,6 +430,26 @@ sub mset {
do_enquire($self, $qry, $opt, TS);
}
+sub xhc_start_maybe () {
+ require PublicInbox::XapClient;
+ my $xhc = PublicInbox::XapClient::start_helper();
+ require PublicInbox::XhcMset if $xhc;
+ $xhc;
+}
+
+sub async_mset {
+ my ($self, $qry_str, $opt, $cb, @args) = @_;
+ $XHC //= xhc_start_maybe;
+ if ($XHC) { # unconditionally retrieving pct + rank for now
+ my $rd = $XHC->mkreq(undef, 'mset', xh_args($self), qw(-p -R),
+ $qry_str);
+ PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args);
+ } else {
+ my $mset = $self->mset($qry_str, $opt);
+ $cb->(@args, $mset);
+ }
+}
+
sub do_enquire { # shared with CodeSearch
my ($self, $qry, $opt, $col) = @_;
my $enq = $X{Enquire}->new(xdb($self));
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 2d3e942c..9919e25c 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -30,10 +30,9 @@ sub mbox_results {
sub sres_top_html {
my ($ctx) = @_;
- my $srch = $ctx->{ibx}->isrch or
+ my $srch = $ctx->{srch} = $ctx->{ibx}->isrch or
return PublicInbox::WWW::need($ctx, 'Search');
my $q = PublicInbox::SearchQuery->new($ctx->{qp});
- my $x = $q->{x};
my $o = $q->{o} // 0;
my $asc;
if ($o < 0) {
@@ -41,48 +40,57 @@ sub sres_top_html {
$o = -($o + 1); # so [-1] is the last element, like Perl lists
}
- my $code = 200;
# double the limit for expanded views:
- my $opts = {
+ my $opt = {
limit => $q->{l},
offset => $o,
relevance => $q->{r},
threads => $q->{t},
asc => $asc,
};
- my ($mset, $total, $err, $html);
-retry:
- eval {
- my $query = $q->{'q'};
- $srch->query_approxidate($ctx->{ibx}->git, $query);
- $mset = $srch->mset($query, $opts);
- $total = $mset->get_matches_estimated;
- };
- $err = $@;
+ my $qs = $q->{'q'};
+ $srch->query_approxidate($ctx->{ibx}->git, $qs);
+ sub {
+ $ctx->{wcb} = $_[0]; # PSGI server supplied write cb
+ $srch->async_mset($qs, $opt, \&sres_html_cb, $ctx, $opt, $q);
+ }
+}
+
+sub sres_html_cb { # async_mset cb
+ my ($ctx, $opt, $q, $mset, $err) = @_;
+ my $code = 200;
+ my $total = $mset ? $mset->get_matches_estimated : undef;
ctx_prepare($q, $ctx);
+ my ($res, $html);
if ($err) {
$code = 400;
$html = '<pre>'.err_txt($ctx, $err).'</pre><hr>';
} elsif ($total == 0) {
- if (defined($ctx->{-uxs_retried})) {
- # undo retry damage:
+ if (defined($ctx->{-uxs_retried})) { # undo retry damage:
$q->{'q'} = $ctx->{-uxs_retried};
- } elsif (index($q->{'q'}, '%') >= 0) {
+ } elsif (index($q->{'q'}, '%') >= 0) { # retry unescaped
$ctx->{-uxs_retried} = $q->{'q'};
- $q->{'q'} = uri_unescape($q->{'q'});
- goto retry;
+ my $qs = $q->{'q'} = uri_unescape($q->{'q'});
+ $ctx->{srch}->query_approxidate($ctx->{ibx}->git, $qs);
+ return $ctx->{srch}->async_mset($qs, $opt,
+ \&sres_html_cb, $ctx, $opt, $q);
}
$code = 404;
$html = "<pre>\n[No results found]</pre><hr>";
+ } elsif ($q->{x} eq 'A') {
+ $res = adump($mset, $q, $ctx);
} else {
- return adump($_[0], $mset, $q, $ctx) if $x eq 'A';
-
$ctx->{-html_tip} = search_nav_top($mset, $q, $ctx);
- return mset_thread($ctx, $mset, $q) if $x eq 't';
- mset_summary($ctx, $mset, $q); # appends to {-html_tip}
- $html = '';
+ if ($q->{x} eq 't') {
+ $res = mset_thread($ctx, $mset, $q);
+ } else {
+ mset_summary($ctx, $mset, $q); # appends to {-html_tip}
+ $html = '';
+ }
}
- html_oneshot($ctx, $code, $html);
+ $res //= html_oneshot($ctx, $code, $html);
+ my $wcb = delete $ctx->{wcb};
+ ref($res) eq 'CODE' ? $res->($wcb) : $wcb->($res);
}
# display non-nested search results similar to what users expect from
@@ -357,7 +365,7 @@ sub ctx_prepare {
}
sub adump {
- my ($cb, $mset, $q, $ctx) = @_;
+ my ($mset, $q, $ctx) = @_;
$ctx->{ids} = $ctx->{ibx}->isrch->mset_to_artnums($mset);
$ctx->{search_query} = $q; # used by WwwAtomStream::atom_header
PublicInbox::WwwAtomStream->response($ctx, \&adump_i);
diff --git a/lib/PublicInbox/XapClient.pm b/lib/PublicInbox/XapClient.pm
index 98034130..f0270091 100644
--- a/lib/PublicInbox/XapClient.pm
+++ b/lib/PublicInbox/XapClient.pm
@@ -26,6 +26,7 @@ sub mkreq {
}
sub start_helper {
+ $PublicInbox::IPC::send_cmd or return; # can't work w/o SCM_RIGHTS
my @argv = @_;
socketpair(my $sock, my $in, AF_UNIX, SOCK_SEQPACKET, 0);
my $cls = 'PublicInbox::XapHelperCxx';
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index 8c7732f5..e3009720 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -150,11 +150,7 @@ sub mset_iter ($$) {
eval {
my $buf = $it->get_docid;
$buf .= "\0".$it->get_percent if $req->{p};
- my $doc = ($req->{A} || $req->{D}) ? $it->get_document : undef;
- for my $p (@{$req->{A}}) {
- $buf .= "\0".$p.$_ for xap_terms($p, $doc);
- }
- $buf .= "\0".$doc->get_data if $req->{D};
+ $buf .= "\0".$it->get_rank if $req->{R};
say { $req->{0} } $buf;
};
$@ ? iter_retry_check($req) : 0;
@@ -170,7 +166,8 @@ sub cmd_mset { # to be used by WWW + IMAP
$opt->{eidx_key} = $req->{O} if defined $req->{O};
$opt->{threadid} = $req->{T} if defined $req->{T};
my $mset = $req->{srch}->mset($qry_str, $opt);
- say { $req->{0} } 'mset.size=', $mset->size;
+ say { $req->{0} } 'mset.size=', $mset->size,
+ ' .get_matches_estimated=', $mset->get_matches_estimated;
for my $it ($mset->items) {
for (my $t = 10; $t > 0; --$t) {
$t = mset_iter($req, $it) // $t;
diff --git a/lib/PublicInbox/XhcMset.pm b/lib/PublicInbox/XhcMset.pm
new file mode 100644
index 00000000..b2396bde
--- /dev/null
+++ b/lib/PublicInbox/XhcMset.pm
@@ -0,0 +1,50 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# mocks Xapian::Mset
+package PublicInbox::XhcMset;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::XhcMsetIterator;
+use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
+
+sub event_step {
+ my ($self) = @_;
+ my $rd = $self->{sock};
+ my ($cb, @args) = @{delete $self->{cb_args} // return};
+ eval {
+ my $xdb = $self->{srch}->xdb;
+ my $hdr = <$rd> // die "E: reading mset header: $!";
+ for (split /\s+/, $hdr) { # read mset.size + estimated_matches
+ my ($k, $v) = split /=/, $_, 2;
+ $k =~ s/\A[^\.]*\.//; # s/(mset)?\./
+ $self->{$k} = $v;
+ }
+ $self->{size} // die "E: bad xhc header: `$hdr'";
+ my @it = map {
+ PublicInbox::XhcMsetIterator::make($_, $xdb)
+ } <$rd>;
+ $self->{items} = \@it;
+ };
+ my $err = $@;
+ $self->close;
+ eval { $cb->(@args, $self, $err) };
+ warn "E: $@\n" if $@;
+}
+
+sub maybe_new {
+ my (undef, $rd, $srch, @cb_args) = @_;
+ my $self = bless { cb_args => \@cb_args, srch => $srch }, __PACKAGE__;
+ if ($PublicInbox::DS::in_loop) {
+ $self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT);
+ } else {
+ local $self->{sock} = $rd;
+ event_step($self);
+ }
+}
+
+eval(join('', map { "sub $_ { \$_[0]->{$_} }\n" } qw(size
+ get_matches_estimated)));
+
+sub items { @{$_[0]->{items}} }
+
+1;
diff --git a/lib/PublicInbox/XhcMsetIterator.pm b/lib/PublicInbox/XhcMsetIterator.pm
new file mode 100644
index 00000000..dcfc61e4
--- /dev/null
+++ b/lib/PublicInbox/XhcMsetIterator.pm
@@ -0,0 +1,20 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# mocks Xapian::MsetIterator, there's many of these allocated at once
+package PublicInbox::XhcMsetIterator;
+use v5.12;
+
+sub make ($) {
+ chomp($_[0]);
+ my @self = map { $_ + 0 } split /\0/, $_[0]; # docid, pct, rank
+ # we don't store $xdb in self[4] since we avoid $it->get_document
+ # in favor of $xdb->get_document($it->get_docid)
+ bless \@self, __PACKAGE__;
+}
+
+sub get_docid { $_[0]->[0] }
+sub get_percent { $_[0]->[1] }
+sub get_rank { $_[0]->[2] }
+
+1;
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 3456910b..6fb99cf2 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -142,7 +142,7 @@ struct req { // argv and pfxv point into global rbuf
bool code_search;
bool relevance; // sort by relevance before column
bool emit_percent;
- bool emit_docdata;
+ bool emit_rank;
bool asc; // ascending sort
};
@@ -641,7 +641,6 @@ static void dispatch(struct req *req)
if (MY_ARG_MAX == req->pfxc)
ABORT("too many -A");
break;
- case 'D': req->emit_docdata = true; break;
case 'K':
req->timeout_sec = strtoul(optarg, &end, 10);
if (*end || req->timeout_sec == ULONG_MAX)
@@ -653,6 +652,7 @@ static void dispatch(struct req *req)
if (*end || req->threadid == ULLONG_MAX)
ABORT("-T %s", optarg);
break;
+ case 'R': req->emit_rank = true; break;
default: ABORT("bad switch `-%c'", c);
}
}
diff --git a/lib/PublicInbox/xh_mset.h b/lib/PublicInbox/xh_mset.h
index 4e97a284..33dfef54 100644
--- a/lib/PublicInbox/xh_mset.h
+++ b/lib/PublicInbox/xh_mset.h
@@ -3,20 +3,6 @@
// This file is only intended to be included by xap_helper.h
// it implements pieces used by WWW, IMAP and lei
-static void emit_doc_term(FILE *fp, const char *pfx, Xapian::Document *doc)
-{
- Xapian::TermIterator cur = doc->termlist_begin();
- Xapian::TermIterator end = doc->termlist_end();
- size_t pfx_len = strlen(pfx);
-
- for (cur.skip_to(pfx); cur != end; cur++) {
- std::string tn = *cur;
- if (!starts_with(&tn, pfx, pfx_len)) break;
- fputc(0, fp);
- fwrite(tn.data(), tn.size(), 1, fp);
- }
-}
-
static enum exc_iter mset_iter(const struct req *req, FILE *fp, off_t off,
Xapian::MSetIterator *i)
{
@@ -24,16 +10,9 @@ static enum exc_iter mset_iter(const struct req *req, FILE *fp, off_t off,
fprintf(fp, "%llu", (unsigned long long)(*(*i))); // get_docid
if (req->emit_percent)
fprintf(fp, "%c%d", 0, i->get_percent());
- if (req->pfxc || req->emit_docdata) {
- Xapian::Document doc = i->get_document();
- for (int p = 0; p < req->pfxc; p++)
- emit_doc_term(fp, req->pfxv[p], &doc);
- if (req->emit_docdata) {
- std::string d = doc.get_data();
- fputc(0, fp);
- fwrite(d.data(), d.size(), 1, fp);
- }
- }
+ if (req->emit_rank)
+ fprintf(fp, "%c%llu", 0,
+ (unsigned long long)i->get_rank());
fputc('\n', fp);
} catch (const Xapian::DatabaseModifiedError & e) {
req->srch->db->reopen();
@@ -63,7 +42,9 @@ static bool cmd_mset(struct req *req)
Xapian::MSet mset = req->code_search ? commit_mset(req, qry_str) :
mail_mset(req, qry_str);
fbuf_init(&wbuf);
- fprintf(wbuf.fp, "mset.size=%llu\n", (unsigned long long)mset.size());
+ fprintf(wbuf.fp, "mset.size=%llu .get_matches_estimated=%llu\n",
+ (unsigned long long)mset.size(),
+ (unsigned long long)mset.get_matches_estimated());
int fd = fileno(req->fp[0]);
for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); i++) {
off_t off = ftello(wbuf.fp);
diff --git a/t/cindex.t b/t/cindex.t
index e5f26ec3..aabddca8 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -147,26 +147,26 @@ if ('multi-repo search') {
my $test_xhc = sub {
my ($xhc) = @_;
+ my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
my $impl = $xhc->{impl};
my ($r, @l);
- $r = $xhc->mkreq([], qw(mset -D -c -g), $zp_git, @xh_args, 'NUL');
+ $r = $xhc->mkreq([], qw(mset -c -g), $zp_git, @xh_args, 'NUL');
chomp(@l = <$r>);
- is(shift(@l), 'mset.size=2', "got expected header $impl");
+ like shift(@l), qr/\bmset\.size=2\b/, "got expected header $impl";
my %docid2data;
my @got = sort map {
- my @f = split /\0/;
- is scalar(@f), 2, 'got 2 entries';
- $docid2data{$f[0]} = $f[1];
- $f[1];
+ my ($docid, @extra) = split /\0/;
+ is scalar(@extra), 0, 'no extra fields';
+ $docid2data{$docid} =
+ $csrch->xdb->get_document($docid)->get_data;
} @l;
is_deeply(\@got, $exp, "expected doc_data $impl");
$r = $xhc->mkreq([], qw(mset -c -g), "$tmp/wt0/.git", @xh_args, 'NUL');
chomp(@l = <$r>);
- is(shift(@l), 'mset.size=0', "got miss in wrong dir $impl");
+ like shift(@l), qr/\bmset.size=0\b/, "got miss in wrong dir $impl";
is_deeply(\@l, [], "no extra lines $impl");
- my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
while (my ($did, $expect) = each %docid2data) {
is_deeply($csrch->xdb->get_document($did)->get_data,
$expect, "docid=$did data matches");
@@ -179,14 +179,15 @@ SKIP: {
require_mods('+SCM_RIGHTS', 1);
require PublicInbox::XapClient;
my $xhc = PublicInbox::XapClient::start_helper('-j0');
- $test_xhc->($xhc);
+ my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
+ $test_xhc->($xhc, $csrch);
skip 'PI_NO_CXX set', 1 if $ENV{PI_NO_CXX};
$xhc->{impl} =~ /Cxx/ or
skip 'C++ compiler or xapian development libs missing', 1;
skip 'TEST_XH_CXX_ONLY set', 1 if $ENV{TEST_XH_CXX_ONLY};
local $ENV{PI_NO_CXX} = 1; # force XS or SWIG binding test
$xhc = PublicInbox::XapClient::start_helper('-j0');
- $test_xhc->($xhc);
+ $test_xhc->($xhc, $csrch);
}
if ('--update') {
diff --git a/t/xap_helper.t b/t/xap_helper.t
index 0f474608..d1394090 100644
--- a/t/xap_helper.t
+++ b/t/xap_helper.t
@@ -204,26 +204,26 @@ for my $n (@NO_CXX) {
$err = do { local $/; <$err_r> };
is $err, "mset.size=6 nr_out=5\n", "got expected status ($xhc->{impl})";
- $r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args,
+ $r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
'dfn:lib/PublicInbox/Search.pm');
chomp((my $hdr, @res) = readline($r));
- is $hdr, 'mset.size=1', "got expected header via mset ($xhc->{impl}";
+ like $hdr, qr/\bmset\.size=1\b/,
+ "got expected header via mset ($xhc->{impl}";
is scalar(@res), 1, 'got one result';
@res = split /\0/, $res[0];
{
my $doc = $v2->search->xdb->get_document($res[0]);
+ ok $doc, 'valid document retrieved';
my @q = PublicInbox::Search::xap_terms('Q', $doc);
is_deeply \@q, [ $mid ], 'docid usable';
}
ok $res[1] > 0 && $res[1] <= 100, 'pct > 0 && <= 100';
- is $res[2], 'XDFID'.$dfid, 'XDFID result matches';
- is $res[3], 'Q'.$mid, 'Q (msgid) mset result matches';
- is scalar(@res), 4, 'only 4 columns in result';
+ is scalar(@res), 2, 'only 2 columns in result';
- $r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args,
+ $r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
'dt:19700101'.'000000..');
chomp(($hdr, @res) = readline($r));
- is $hdr, 'mset.size=6',
+ like $hdr, qr/\bmset\.size=6\b/,
"got expected header via multi-result mset ($xhc->{impl}";
is(scalar(@res), 6, 'got 6 rows');
for my $r (@res) {
@@ -231,17 +231,7 @@ for my $n (@NO_CXX) {
my $doc = $v2->search->xdb->get_document($docid);
ok $pct > 0 && $pct <= 100,
"pct > 0 && <= 100 #$docid ($xhc->{impl})";
- my %terms;
- for (@rest) {
- s/\A([A-Z]+)// or xbail 'no prefix=', \@rest;
- push @{$terms{$1}}, $_;
- }
- while (my ($pfx, $vals) = each %terms) {
- @$vals = sort @$vals;
- my @q = PublicInbox::Search::xap_terms($pfx, $doc);
- is_deeply $vals, \@q,
- "#$docid $pfx as expected ($xhc->{impl})";
- }
+ is scalar(@rest), 0, 'no extra rows returned';
}
my $nr;
for my $i (7, 8, 39, 40) {
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH] www: start wiring up search to use async xap_helper
@ 2024-04-19 12:05 Eric Wong
0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2024-04-19 12:05 UTC (permalink / raw)
To: spew
The C++ version of xap_helper will allow more complex and
expensive queries. Both the Perl and C++-only version will
allow offloading search into a separate process which can be
killed via ITIMER_REAL or RLIMIT_CPU in the face of overload.
xap_helper is simplified to drop terms+data from `mset' command.
Retrieving Xapian document terms, data (and possibly values) and
transferring to the Perl side would be an increase in complexity
on both the Perl and C++ sides. It would require more I/O and
transient memory use on the Perl side, so lets ignore the
document-related stuff here for now for ease-of-development.
---
MANIFEST | 2 ++
lib/PublicInbox/Isearch.pm | 50 +++++++++++++++-----------
lib/PublicInbox/Search.pm | 23 +++++++++++-
lib/PublicInbox/SearchView.pm | 58 +++++++++++++++++-------------
lib/PublicInbox/XapClient.pm | 1 +
lib/PublicInbox/XapHelper.pm | 9 ++---
lib/PublicInbox/XhcMset.pm | 49 +++++++++++++++++++++++++
lib/PublicInbox/XhcMsetIterator.pm | 20 +++++++++++
lib/PublicInbox/xap_helper.h | 4 +--
lib/PublicInbox/xh_mset.h | 31 ++++------------
t/cindex.t | 21 +++++------
t/xap_helper.t | 26 +++++---------
12 files changed, 187 insertions(+), 107 deletions(-)
create mode 100644 lib/PublicInbox/XhcMset.pm
create mode 100644 lib/PublicInbox/XhcMsetIterator.pm
diff --git a/MANIFEST b/MANIFEST
index 4c974338..fb175e5f 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -382,6 +382,8 @@ lib/PublicInbox/XapClient.pm
lib/PublicInbox/XapHelper.pm
lib/PublicInbox/XapHelperCxx.pm
lib/PublicInbox/Xapcmd.pm
+lib/PublicInbox/XhcMset.pm
+lib/PublicInbox/XhcMsetIterator.pm
lib/PublicInbox/gcf2_libgit2.h
lib/PublicInbox/xap_helper.h
lib/PublicInbox/xh_cidx.h
diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm
index 62112171..20808d6d 100644
--- a/lib/PublicInbox/Isearch.pm
+++ b/lib/PublicInbox/Isearch.pm
@@ -26,34 +26,44 @@ SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
sub query_approxidate { $_[0]->{es}->query_approxidate($_[1], $_[2]) }
-sub mset {
- my ($self, $str, $opt) = @_;
+sub eidx_mset_prep ($$) {
+ my ($self, $opt) = @_;
my %opt = $opt ? %$opt : ();
$opt{eidx_key} = $self->{eidx_key};
- if (my $uid_range = $opt{uid_range}) {
- my ($beg, $end) = @$uid_range;
- my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
- my $dbh = $self->{es}->over->dbh;
- my $sth = $dbh->prepare_cached(<<'', undef, 1);
+ my $uid_range = $opt{uid_range} or return \%opt;
+ my ($beg, $end) = @$uid_range;
+ my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
+ my $dbh = $self->{es}->over->dbh;
+ my $sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT MIN(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
- $sth->execute($ibx_id, $beg, $end);
- my @r = ($sth->fetchrow_array);
+ $sth->execute($ibx_id, $beg, $end);
+ my @r = ($sth->fetchrow_array);
- $sth = $dbh->prepare_cached(<<'', undef, 1);
+ $sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT MAX(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
- $sth->execute($ibx_id, $beg, $end);
- $r[1] = $sth->fetchrow_array;
- if (defined($r[1]) && defined($r[0])) {
- $opt{limit} = $r[1] - $r[0] + 1;
- } else {
- $r[1] //= $self->{es}->xdb->get_lastdocid;
- $r[0] //= 0;
- }
- $opt{uid_range} = \@r; # these are fed to Xapian and SQLite
+ $sth->execute($ibx_id, $beg, $end);
+ $r[1] = $sth->fetchrow_array;
+ if (defined($r[1]) && defined($r[0])) {
+ $opt{limit} = $r[1] - $r[0] + 1;
+ } else {
+ $r[1] //= $self->{es}->xdb->get_lastdocid;
+ $r[0] //= 0;
}
- $self->{es}->mset($str, \%opt);
+ $opt{uid_range} = \@r; # these are fed to Xapian and SQLite
+ \%opt;
+}
+
+sub mset {
+ my ($self, $str, $opt) = @_;
+ $self->{es}->mset($str, eidx_mset_prep $self, $opt);
+}
+
+sub async_mset {
+ my ($self, $str, $opt, $cb, @args) = @_;
+ $opt = eidx_mset_prep $self, $opt;
+ $self->{es}->async_mset($str, $opt, $cb, @args);
}
sub mset_to_artnums {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 678c8c5d..e83bb4c2 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -11,6 +11,7 @@ our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
use List::Util qw(max);
use POSIX qw(strftime);
use Carp ();
+our $XHC;
# values for searching, changing the numeric value breaks
# compatibility with old indices (so don't change them it)
@@ -89,9 +90,9 @@ our @XH_SPEC = (
'r', # 1=relevance then column
't', # collapse threads
'A=s@', # prefixes
- 'D', # emit docdata
'K=i', # timeout kill after i seconds
'O=s', # eidx_key
+ 'R', # emit rank
'T=i', # threadid
);
@@ -429,6 +430,26 @@ sub mset {
do_enquire($self, $qry, $opt, TS);
}
+sub xhc_start_maybe () {
+ require PublicInbox::XapClient;
+ my $xhc = PublicInbox::XapClient::start_helper();
+ require PublicInbox::XhcMset if $xhc;
+ $xhc;
+}
+
+sub async_mset {
+ my ($self, $qry_str, $opt, $cb, @args) = @_;
+ $XHC //= xhc_start_maybe;
+ if ($XHC) { # unconditionally retrieving pct + rank for now
+ my $rd = $XHC->mkreq(undef, 'mset', xh_args($self), qw(-p -R),
+ $qry_str);
+ PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args);
+ } else {
+ my $mset = $self->mset($qry_str, $opt);
+ $cb->(@args, $mset);
+ }
+}
+
sub do_enquire { # shared with CodeSearch
my ($self, $qry, $opt, $col) = @_;
my $enq = $X{Enquire}->new(xdb($self));
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 2d3e942c..9919e25c 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -30,10 +30,9 @@ sub mbox_results {
sub sres_top_html {
my ($ctx) = @_;
- my $srch = $ctx->{ibx}->isrch or
+ my $srch = $ctx->{srch} = $ctx->{ibx}->isrch or
return PublicInbox::WWW::need($ctx, 'Search');
my $q = PublicInbox::SearchQuery->new($ctx->{qp});
- my $x = $q->{x};
my $o = $q->{o} // 0;
my $asc;
if ($o < 0) {
@@ -41,48 +40,57 @@ sub sres_top_html {
$o = -($o + 1); # so [-1] is the last element, like Perl lists
}
- my $code = 200;
# double the limit for expanded views:
- my $opts = {
+ my $opt = {
limit => $q->{l},
offset => $o,
relevance => $q->{r},
threads => $q->{t},
asc => $asc,
};
- my ($mset, $total, $err, $html);
-retry:
- eval {
- my $query = $q->{'q'};
- $srch->query_approxidate($ctx->{ibx}->git, $query);
- $mset = $srch->mset($query, $opts);
- $total = $mset->get_matches_estimated;
- };
- $err = $@;
+ my $qs = $q->{'q'};
+ $srch->query_approxidate($ctx->{ibx}->git, $qs);
+ sub {
+ $ctx->{wcb} = $_[0]; # PSGI server supplied write cb
+ $srch->async_mset($qs, $opt, \&sres_html_cb, $ctx, $opt, $q);
+ }
+}
+
+sub sres_html_cb { # async_mset cb
+ my ($ctx, $opt, $q, $mset, $err) = @_;
+ my $code = 200;
+ my $total = $mset ? $mset->get_matches_estimated : undef;
ctx_prepare($q, $ctx);
+ my ($res, $html);
if ($err) {
$code = 400;
$html = '<pre>'.err_txt($ctx, $err).'</pre><hr>';
} elsif ($total == 0) {
- if (defined($ctx->{-uxs_retried})) {
- # undo retry damage:
+ if (defined($ctx->{-uxs_retried})) { # undo retry damage:
$q->{'q'} = $ctx->{-uxs_retried};
- } elsif (index($q->{'q'}, '%') >= 0) {
+ } elsif (index($q->{'q'}, '%') >= 0) { # retry unescaped
$ctx->{-uxs_retried} = $q->{'q'};
- $q->{'q'} = uri_unescape($q->{'q'});
- goto retry;
+ my $qs = $q->{'q'} = uri_unescape($q->{'q'});
+ $ctx->{srch}->query_approxidate($ctx->{ibx}->git, $qs);
+ return $ctx->{srch}->async_mset($qs, $opt,
+ \&sres_html_cb, $ctx, $opt, $q);
}
$code = 404;
$html = "<pre>\n[No results found]</pre><hr>";
+ } elsif ($q->{x} eq 'A') {
+ $res = adump($mset, $q, $ctx);
} else {
- return adump($_[0], $mset, $q, $ctx) if $x eq 'A';
-
$ctx->{-html_tip} = search_nav_top($mset, $q, $ctx);
- return mset_thread($ctx, $mset, $q) if $x eq 't';
- mset_summary($ctx, $mset, $q); # appends to {-html_tip}
- $html = '';
+ if ($q->{x} eq 't') {
+ $res = mset_thread($ctx, $mset, $q);
+ } else {
+ mset_summary($ctx, $mset, $q); # appends to {-html_tip}
+ $html = '';
+ }
}
- html_oneshot($ctx, $code, $html);
+ $res //= html_oneshot($ctx, $code, $html);
+ my $wcb = delete $ctx->{wcb};
+ ref($res) eq 'CODE' ? $res->($wcb) : $wcb->($res);
}
# display non-nested search results similar to what users expect from
@@ -357,7 +365,7 @@ sub ctx_prepare {
}
sub adump {
- my ($cb, $mset, $q, $ctx) = @_;
+ my ($mset, $q, $ctx) = @_;
$ctx->{ids} = $ctx->{ibx}->isrch->mset_to_artnums($mset);
$ctx->{search_query} = $q; # used by WwwAtomStream::atom_header
PublicInbox::WwwAtomStream->response($ctx, \&adump_i);
diff --git a/lib/PublicInbox/XapClient.pm b/lib/PublicInbox/XapClient.pm
index 98034130..f0270091 100644
--- a/lib/PublicInbox/XapClient.pm
+++ b/lib/PublicInbox/XapClient.pm
@@ -26,6 +26,7 @@ sub mkreq {
}
sub start_helper {
+ $PublicInbox::IPC::send_cmd or return; # can't work w/o SCM_RIGHTS
my @argv = @_;
socketpair(my $sock, my $in, AF_UNIX, SOCK_SEQPACKET, 0);
my $cls = 'PublicInbox::XapHelperCxx';
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index 8c7732f5..e3009720 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -150,11 +150,7 @@ sub mset_iter ($$) {
eval {
my $buf = $it->get_docid;
$buf .= "\0".$it->get_percent if $req->{p};
- my $doc = ($req->{A} || $req->{D}) ? $it->get_document : undef;
- for my $p (@{$req->{A}}) {
- $buf .= "\0".$p.$_ for xap_terms($p, $doc);
- }
- $buf .= "\0".$doc->get_data if $req->{D};
+ $buf .= "\0".$it->get_rank if $req->{R};
say { $req->{0} } $buf;
};
$@ ? iter_retry_check($req) : 0;
@@ -170,7 +166,8 @@ sub cmd_mset { # to be used by WWW + IMAP
$opt->{eidx_key} = $req->{O} if defined $req->{O};
$opt->{threadid} = $req->{T} if defined $req->{T};
my $mset = $req->{srch}->mset($qry_str, $opt);
- say { $req->{0} } 'mset.size=', $mset->size;
+ say { $req->{0} } 'mset.size=', $mset->size,
+ ' .get_matches_estimated=', $mset->get_matches_estimated;
for my $it ($mset->items) {
for (my $t = 10; $t > 0; --$t) {
$t = mset_iter($req, $it) // $t;
diff --git a/lib/PublicInbox/XhcMset.pm b/lib/PublicInbox/XhcMset.pm
new file mode 100644
index 00000000..7f22eec9
--- /dev/null
+++ b/lib/PublicInbox/XhcMset.pm
@@ -0,0 +1,49 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# mocks Xapian::Mset
+package PublicInbox::XhcMset;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::XhcMsetIterator;
+use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
+
+sub event_step {
+ my ($self) = @_;
+ my $rd = $self->{sock};
+ my ($cb, @args) = @{delete $self->{cb_args} // return};
+ eval {
+ my $hdr = <$rd> // die "E: reading mset header: $!";
+ for (split /\s+/, $hdr) { # read mset.size + estimated_matches
+ my ($k, $v) = split /=/, $_, 2;
+ $k =~ s/\A[^\.]*\.//; # s/(mset)?\./
+ $self->{$k} = $v;
+ }
+ $self->{size} // die "E: bad xhc header: `$hdr'";
+ my @it = map {
+ PublicInbox::XhcMsetIterator::make($_)
+ } <$rd>;
+ $self->{items} = \@it;
+ };
+ my $err = $@;
+ $self->close;
+ eval { $cb->(@args, $self, $err) };
+ warn "E: $@\n" if $@;
+}
+
+sub maybe_new {
+ my (undef, $rd, $srch, @cb_args) = @_;
+ my $self = bless { cb_args => \@cb_args, srch => $srch }, __PACKAGE__;
+ if ($PublicInbox::DS::in_loop) {
+ $self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT);
+ } else {
+ local $self->{sock} = $rd;
+ event_step($self);
+ }
+}
+
+eval(join('', map { "sub $_ { \$_[0]->{$_} }\n" } qw(size
+ get_matches_estimated)));
+
+sub items { @{$_[0]->{items}} }
+
+1;
diff --git a/lib/PublicInbox/XhcMsetIterator.pm b/lib/PublicInbox/XhcMsetIterator.pm
new file mode 100644
index 00000000..dcfc61e4
--- /dev/null
+++ b/lib/PublicInbox/XhcMsetIterator.pm
@@ -0,0 +1,20 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# mocks Xapian::MsetIterator, there's many of these allocated at once
+package PublicInbox::XhcMsetIterator;
+use v5.12;
+
+sub make ($) {
+ chomp($_[0]);
+ my @self = map { $_ + 0 } split /\0/, $_[0]; # docid, pct, rank
+ # we don't store $xdb in self[4] since we avoid $it->get_document
+ # in favor of $xdb->get_document($it->get_docid)
+ bless \@self, __PACKAGE__;
+}
+
+sub get_docid { $_[0]->[0] }
+sub get_percent { $_[0]->[1] }
+sub get_rank { $_[0]->[2] }
+
+1;
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 3456910b..6fb99cf2 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -142,7 +142,7 @@ struct req { // argv and pfxv point into global rbuf
bool code_search;
bool relevance; // sort by relevance before column
bool emit_percent;
- bool emit_docdata;
+ bool emit_rank;
bool asc; // ascending sort
};
@@ -641,7 +641,6 @@ static void dispatch(struct req *req)
if (MY_ARG_MAX == req->pfxc)
ABORT("too many -A");
break;
- case 'D': req->emit_docdata = true; break;
case 'K':
req->timeout_sec = strtoul(optarg, &end, 10);
if (*end || req->timeout_sec == ULONG_MAX)
@@ -653,6 +652,7 @@ static void dispatch(struct req *req)
if (*end || req->threadid == ULLONG_MAX)
ABORT("-T %s", optarg);
break;
+ case 'R': req->emit_rank = true; break;
default: ABORT("bad switch `-%c'", c);
}
}
diff --git a/lib/PublicInbox/xh_mset.h b/lib/PublicInbox/xh_mset.h
index 4e97a284..33dfef54 100644
--- a/lib/PublicInbox/xh_mset.h
+++ b/lib/PublicInbox/xh_mset.h
@@ -3,20 +3,6 @@
// This file is only intended to be included by xap_helper.h
// it implements pieces used by WWW, IMAP and lei
-static void emit_doc_term(FILE *fp, const char *pfx, Xapian::Document *doc)
-{
- Xapian::TermIterator cur = doc->termlist_begin();
- Xapian::TermIterator end = doc->termlist_end();
- size_t pfx_len = strlen(pfx);
-
- for (cur.skip_to(pfx); cur != end; cur++) {
- std::string tn = *cur;
- if (!starts_with(&tn, pfx, pfx_len)) break;
- fputc(0, fp);
- fwrite(tn.data(), tn.size(), 1, fp);
- }
-}
-
static enum exc_iter mset_iter(const struct req *req, FILE *fp, off_t off,
Xapian::MSetIterator *i)
{
@@ -24,16 +10,9 @@ static enum exc_iter mset_iter(const struct req *req, FILE *fp, off_t off,
fprintf(fp, "%llu", (unsigned long long)(*(*i))); // get_docid
if (req->emit_percent)
fprintf(fp, "%c%d", 0, i->get_percent());
- if (req->pfxc || req->emit_docdata) {
- Xapian::Document doc = i->get_document();
- for (int p = 0; p < req->pfxc; p++)
- emit_doc_term(fp, req->pfxv[p], &doc);
- if (req->emit_docdata) {
- std::string d = doc.get_data();
- fputc(0, fp);
- fwrite(d.data(), d.size(), 1, fp);
- }
- }
+ if (req->emit_rank)
+ fprintf(fp, "%c%llu", 0,
+ (unsigned long long)i->get_rank());
fputc('\n', fp);
} catch (const Xapian::DatabaseModifiedError & e) {
req->srch->db->reopen();
@@ -63,7 +42,9 @@ static bool cmd_mset(struct req *req)
Xapian::MSet mset = req->code_search ? commit_mset(req, qry_str) :
mail_mset(req, qry_str);
fbuf_init(&wbuf);
- fprintf(wbuf.fp, "mset.size=%llu\n", (unsigned long long)mset.size());
+ fprintf(wbuf.fp, "mset.size=%llu .get_matches_estimated=%llu\n",
+ (unsigned long long)mset.size(),
+ (unsigned long long)mset.get_matches_estimated());
int fd = fileno(req->fp[0]);
for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); i++) {
off_t off = ftello(wbuf.fp);
diff --git a/t/cindex.t b/t/cindex.t
index e5f26ec3..aabddca8 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -147,26 +147,26 @@ if ('multi-repo search') {
my $test_xhc = sub {
my ($xhc) = @_;
+ my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
my $impl = $xhc->{impl};
my ($r, @l);
- $r = $xhc->mkreq([], qw(mset -D -c -g), $zp_git, @xh_args, 'NUL');
+ $r = $xhc->mkreq([], qw(mset -c -g), $zp_git, @xh_args, 'NUL');
chomp(@l = <$r>);
- is(shift(@l), 'mset.size=2', "got expected header $impl");
+ like shift(@l), qr/\bmset\.size=2\b/, "got expected header $impl";
my %docid2data;
my @got = sort map {
- my @f = split /\0/;
- is scalar(@f), 2, 'got 2 entries';
- $docid2data{$f[0]} = $f[1];
- $f[1];
+ my ($docid, @extra) = split /\0/;
+ is scalar(@extra), 0, 'no extra fields';
+ $docid2data{$docid} =
+ $csrch->xdb->get_document($docid)->get_data;
} @l;
is_deeply(\@got, $exp, "expected doc_data $impl");
$r = $xhc->mkreq([], qw(mset -c -g), "$tmp/wt0/.git", @xh_args, 'NUL');
chomp(@l = <$r>);
- is(shift(@l), 'mset.size=0', "got miss in wrong dir $impl");
+ like shift(@l), qr/\bmset.size=0\b/, "got miss in wrong dir $impl";
is_deeply(\@l, [], "no extra lines $impl");
- my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
while (my ($did, $expect) = each %docid2data) {
is_deeply($csrch->xdb->get_document($did)->get_data,
$expect, "docid=$did data matches");
@@ -179,14 +179,15 @@ SKIP: {
require_mods('+SCM_RIGHTS', 1);
require PublicInbox::XapClient;
my $xhc = PublicInbox::XapClient::start_helper('-j0');
- $test_xhc->($xhc);
+ my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
+ $test_xhc->($xhc, $csrch);
skip 'PI_NO_CXX set', 1 if $ENV{PI_NO_CXX};
$xhc->{impl} =~ /Cxx/ or
skip 'C++ compiler or xapian development libs missing', 1;
skip 'TEST_XH_CXX_ONLY set', 1 if $ENV{TEST_XH_CXX_ONLY};
local $ENV{PI_NO_CXX} = 1; # force XS or SWIG binding test
$xhc = PublicInbox::XapClient::start_helper('-j0');
- $test_xhc->($xhc);
+ $test_xhc->($xhc, $csrch);
}
if ('--update') {
diff --git a/t/xap_helper.t b/t/xap_helper.t
index 0f474608..d1394090 100644
--- a/t/xap_helper.t
+++ b/t/xap_helper.t
@@ -204,26 +204,26 @@ for my $n (@NO_CXX) {
$err = do { local $/; <$err_r> };
is $err, "mset.size=6 nr_out=5\n", "got expected status ($xhc->{impl})";
- $r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args,
+ $r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
'dfn:lib/PublicInbox/Search.pm');
chomp((my $hdr, @res) = readline($r));
- is $hdr, 'mset.size=1', "got expected header via mset ($xhc->{impl}";
+ like $hdr, qr/\bmset\.size=1\b/,
+ "got expected header via mset ($xhc->{impl}";
is scalar(@res), 1, 'got one result';
@res = split /\0/, $res[0];
{
my $doc = $v2->search->xdb->get_document($res[0]);
+ ok $doc, 'valid document retrieved';
my @q = PublicInbox::Search::xap_terms('Q', $doc);
is_deeply \@q, [ $mid ], 'docid usable';
}
ok $res[1] > 0 && $res[1] <= 100, 'pct > 0 && <= 100';
- is $res[2], 'XDFID'.$dfid, 'XDFID result matches';
- is $res[3], 'Q'.$mid, 'Q (msgid) mset result matches';
- is scalar(@res), 4, 'only 4 columns in result';
+ is scalar(@res), 2, 'only 2 columns in result';
- $r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args,
+ $r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
'dt:19700101'.'000000..');
chomp(($hdr, @res) = readline($r));
- is $hdr, 'mset.size=6',
+ like $hdr, qr/\bmset\.size=6\b/,
"got expected header via multi-result mset ($xhc->{impl}";
is(scalar(@res), 6, 'got 6 rows');
for my $r (@res) {
@@ -231,17 +231,7 @@ for my $n (@NO_CXX) {
my $doc = $v2->search->xdb->get_document($docid);
ok $pct > 0 && $pct <= 100,
"pct > 0 && <= 100 #$docid ($xhc->{impl})";
- my %terms;
- for (@rest) {
- s/\A([A-Z]+)// or xbail 'no prefix=', \@rest;
- push @{$terms{$1}}, $_;
- }
- while (my ($pfx, $vals) = each %terms) {
- @$vals = sort @$vals;
- my @q = PublicInbox::Search::xap_terms($pfx, $doc);
- is_deeply $vals, \@q,
- "#$docid $pfx as expected ($xhc->{impl})";
- }
+ is scalar(@rest), 0, 'no extra rows returned';
}
my $nr;
for my $i (7, 8, 39, 40) {
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-04-19 12:05 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-19 12:05 [PATCH] www: start wiring up search to use async xap_helper Eric Wong
-- strict thread matches above, loose matches on Subject: below --
2024-04-19 11:56 Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).