dumping ground for random patches and texts
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] www: start wiring up search to use async xap_helper
Date: Fri, 19 Apr 2024 12:05:55 +0000	[thread overview]
Message-ID: <20240419120555.4139653-1-e@80x24.org> (raw)

The C++ version of xap_helper will allow more complex and
expensive queries.  Both the Perl and C++-only version will
allow offloading search into a separate process which can be
killed via ITIMER_REAL or RLIMIT_CPU in the face of overload.

xap_helper is simplified to drop terms+data from `mset' command.
Retrieving Xapian document terms, data (and possibly values) and
transferring to the Perl side would be an increase in complexity
on both the Perl and C++ sides.  It would require more I/O and
transient memory use on the Perl side, so lets ignore the
document-related stuff here for now for ease-of-development.
---
 MANIFEST                           |  2 ++
 lib/PublicInbox/Isearch.pm         | 50 +++++++++++++++-----------
 lib/PublicInbox/Search.pm          | 23 +++++++++++-
 lib/PublicInbox/SearchView.pm      | 58 +++++++++++++++++-------------
 lib/PublicInbox/XapClient.pm       |  1 +
 lib/PublicInbox/XapHelper.pm       |  9 ++---
 lib/PublicInbox/XhcMset.pm         | 49 +++++++++++++++++++++++++
 lib/PublicInbox/XhcMsetIterator.pm | 20 +++++++++++
 lib/PublicInbox/xap_helper.h       |  4 +--
 lib/PublicInbox/xh_mset.h          | 31 ++++------------
 t/cindex.t                         | 21 +++++------
 t/xap_helper.t                     | 26 +++++---------
 12 files changed, 187 insertions(+), 107 deletions(-)
 create mode 100644 lib/PublicInbox/XhcMset.pm
 create mode 100644 lib/PublicInbox/XhcMsetIterator.pm

diff --git a/MANIFEST b/MANIFEST
index 4c974338..fb175e5f 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -382,6 +382,8 @@ lib/PublicInbox/XapClient.pm
 lib/PublicInbox/XapHelper.pm
 lib/PublicInbox/XapHelperCxx.pm
 lib/PublicInbox/Xapcmd.pm
+lib/PublicInbox/XhcMset.pm
+lib/PublicInbox/XhcMsetIterator.pm
 lib/PublicInbox/gcf2_libgit2.h
 lib/PublicInbox/xap_helper.h
 lib/PublicInbox/xh_cidx.h
diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm
index 62112171..20808d6d 100644
--- a/lib/PublicInbox/Isearch.pm
+++ b/lib/PublicInbox/Isearch.pm
@@ -26,34 +26,44 @@ SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
 
 sub query_approxidate { $_[0]->{es}->query_approxidate($_[1], $_[2]) }
 
-sub mset {
-	my ($self, $str, $opt) = @_;
+sub eidx_mset_prep ($$) {
+	my ($self, $opt) = @_;
 	my %opt = $opt ? %$opt : ();
 	$opt{eidx_key} = $self->{eidx_key};
-	if (my $uid_range = $opt{uid_range}) {
-		my ($beg, $end) = @$uid_range;
-		my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
-		my $dbh = $self->{es}->over->dbh;
-		my $sth = $dbh->prepare_cached(<<'', undef, 1);
+	my $uid_range = $opt{uid_range} or return \%opt;
+	my ($beg, $end) = @$uid_range;
+	my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
+	my $dbh = $self->{es}->over->dbh;
+	my $sth = $dbh->prepare_cached(<<'', undef, 1);
 SELECT MIN(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
 
-		$sth->execute($ibx_id, $beg, $end);
-		my @r = ($sth->fetchrow_array);
+	$sth->execute($ibx_id, $beg, $end);
+	my @r = ($sth->fetchrow_array);
 
-		$sth = $dbh->prepare_cached(<<'', undef, 1);
+	$sth = $dbh->prepare_cached(<<'', undef, 1);
 SELECT MAX(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
 
-		$sth->execute($ibx_id, $beg, $end);
-		$r[1] = $sth->fetchrow_array;
-		if (defined($r[1]) && defined($r[0])) {
-			$opt{limit} = $r[1] - $r[0] + 1;
-		} else {
-			$r[1] //= $self->{es}->xdb->get_lastdocid;
-			$r[0] //= 0;
-		}
-		$opt{uid_range} = \@r; # these are fed to Xapian and SQLite
+	$sth->execute($ibx_id, $beg, $end);
+	$r[1] = $sth->fetchrow_array;
+	if (defined($r[1]) && defined($r[0])) {
+		$opt{limit} = $r[1] - $r[0] + 1;
+	} else {
+		$r[1] //= $self->{es}->xdb->get_lastdocid;
+		$r[0] //= 0;
 	}
-	$self->{es}->mset($str, \%opt);
+	$opt{uid_range} = \@r; # these are fed to Xapian and SQLite
+	\%opt;
+}
+
+sub mset {
+	my ($self, $str, $opt) = @_;
+	$self->{es}->mset($str, eidx_mset_prep $self, $opt);
+}
+
+sub async_mset {
+	my ($self, $str, $opt, $cb, @args) = @_;
+	$opt = eidx_mset_prep $self, $opt;
+	$self->{es}->async_mset($str, $opt, $cb, @args);
 }
 
 sub mset_to_artnums {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 678c8c5d..e83bb4c2 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -11,6 +11,7 @@ our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
 use List::Util qw(max);
 use POSIX qw(strftime);
 use Carp ();
+our $XHC;
 
 # values for searching, changing the numeric value breaks
 # compatibility with old indices (so don't change them it)
@@ -89,9 +90,9 @@ our @XH_SPEC = (
 	'r', # 1=relevance then column
 	't', # collapse threads
 	'A=s@', # prefixes
-	'D', # emit docdata
 	'K=i', # timeout kill after i seconds
 	'O=s', # eidx_key
+	'R', # emit rank
 	'T=i', # threadid
 );
 
@@ -429,6 +430,26 @@ sub mset {
 	do_enquire($self, $qry, $opt, TS);
 }
 
+sub xhc_start_maybe () {
+	require PublicInbox::XapClient;
+	my $xhc = PublicInbox::XapClient::start_helper();
+	require PublicInbox::XhcMset if $xhc;
+	$xhc;
+}
+
+sub async_mset {
+	my ($self, $qry_str, $opt, $cb, @args) = @_;
+	$XHC //= xhc_start_maybe;
+	if ($XHC) { # unconditionally retrieving pct + rank for now
+		my $rd = $XHC->mkreq(undef, 'mset', xh_args($self), qw(-p -R),
+					$qry_str);
+		PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args);
+	} else {
+		my $mset = $self->mset($qry_str, $opt);
+		$cb->(@args, $mset);
+	}
+}
+
 sub do_enquire { # shared with CodeSearch
 	my ($self, $qry, $opt, $col) = @_;
 	my $enq = $X{Enquire}->new(xdb($self));
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 2d3e942c..9919e25c 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -30,10 +30,9 @@ sub mbox_results {
 
 sub sres_top_html {
 	my ($ctx) = @_;
-	my $srch = $ctx->{ibx}->isrch or
+	my $srch = $ctx->{srch} = $ctx->{ibx}->isrch or
 		return PublicInbox::WWW::need($ctx, 'Search');
 	my $q = PublicInbox::SearchQuery->new($ctx->{qp});
-	my $x = $q->{x};
 	my $o = $q->{o} // 0;
 	my $asc;
 	if ($o < 0) {
@@ -41,48 +40,57 @@ sub sres_top_html {
 		$o = -($o + 1); # so [-1] is the last element, like Perl lists
 	}
 
-	my $code = 200;
 	# double the limit for expanded views:
-	my $opts = {
+	my $opt = {
 		limit => $q->{l},
 		offset => $o,
 		relevance => $q->{r},
 		threads => $q->{t},
 		asc => $asc,
 	};
-	my ($mset, $total, $err, $html);
-retry:
-	eval {
-		my $query = $q->{'q'};
-		$srch->query_approxidate($ctx->{ibx}->git, $query);
-		$mset = $srch->mset($query, $opts);
-		$total = $mset->get_matches_estimated;
-	};
-	$err = $@;
+	my $qs = $q->{'q'};
+	$srch->query_approxidate($ctx->{ibx}->git, $qs);
+	sub {
+		$ctx->{wcb} = $_[0]; # PSGI server supplied write cb
+		$srch->async_mset($qs, $opt, \&sres_html_cb, $ctx, $opt, $q);
+	}
+}
+
+sub sres_html_cb { # async_mset cb
+	my ($ctx, $opt, $q, $mset, $err) = @_;
+	my $code = 200;
+	my $total = $mset ? $mset->get_matches_estimated : undef;
 	ctx_prepare($q, $ctx);
+	my ($res, $html);
 	if ($err) {
 		$code = 400;
 		$html = '<pre>'.err_txt($ctx, $err).'</pre><hr>';
 	} elsif ($total == 0) {
-		if (defined($ctx->{-uxs_retried})) {
-			# undo retry damage:
+		if (defined($ctx->{-uxs_retried})) { # undo retry damage:
 			$q->{'q'} = $ctx->{-uxs_retried};
-		} elsif (index($q->{'q'}, '%') >= 0) {
+		} elsif (index($q->{'q'}, '%') >= 0) { # retry unescaped
 			$ctx->{-uxs_retried} = $q->{'q'};
-			$q->{'q'} = uri_unescape($q->{'q'});
-			goto retry;
+			my $qs = $q->{'q'} = uri_unescape($q->{'q'});
+			$ctx->{srch}->query_approxidate($ctx->{ibx}->git, $qs);
+			return $ctx->{srch}->async_mset($qs, $opt,
+						\&sres_html_cb, $ctx, $opt, $q);
 		}
 		$code = 404;
 		$html = "<pre>\n[No results found]</pre><hr>";
+	} elsif ($q->{x} eq 'A') {
+		$res = adump($mset, $q, $ctx);
 	} else {
-		return adump($_[0], $mset, $q, $ctx) if $x eq 'A';
-
 		$ctx->{-html_tip} = search_nav_top($mset, $q, $ctx);
-		return mset_thread($ctx, $mset, $q) if $x eq 't';
-		mset_summary($ctx, $mset, $q); # appends to {-html_tip}
-		$html = '';
+		if ($q->{x} eq 't') {
+			$res = mset_thread($ctx, $mset, $q);
+		} else {
+			mset_summary($ctx, $mset, $q); # appends to {-html_tip}
+			$html = '';
+		}
 	}
-	html_oneshot($ctx, $code, $html);
+	$res //= html_oneshot($ctx, $code, $html);
+	my $wcb = delete $ctx->{wcb};
+	ref($res) eq 'CODE' ? $res->($wcb) : $wcb->($res);
 }
 
 # display non-nested search results similar to what users expect from
@@ -357,7 +365,7 @@ sub ctx_prepare {
 }
 
 sub adump {
-	my ($cb, $mset, $q, $ctx) = @_;
+	my ($mset, $q, $ctx) = @_;
 	$ctx->{ids} = $ctx->{ibx}->isrch->mset_to_artnums($mset);
 	$ctx->{search_query} = $q; # used by WwwAtomStream::atom_header
 	PublicInbox::WwwAtomStream->response($ctx, \&adump_i);
diff --git a/lib/PublicInbox/XapClient.pm b/lib/PublicInbox/XapClient.pm
index 98034130..f0270091 100644
--- a/lib/PublicInbox/XapClient.pm
+++ b/lib/PublicInbox/XapClient.pm
@@ -26,6 +26,7 @@ sub mkreq {
 }
 
 sub start_helper {
+	$PublicInbox::IPC::send_cmd or return; # can't work w/o SCM_RIGHTS
 	my @argv = @_;
 	socketpair(my $sock, my $in, AF_UNIX, SOCK_SEQPACKET, 0);
 	my $cls = 'PublicInbox::XapHelperCxx';
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index 8c7732f5..e3009720 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -150,11 +150,7 @@ sub mset_iter ($$) {
 	eval {
 		my $buf = $it->get_docid;
 		$buf .= "\0".$it->get_percent if $req->{p};
-		my $doc = ($req->{A} || $req->{D}) ? $it->get_document : undef;
-		for my $p (@{$req->{A}}) {
-			$buf .= "\0".$p.$_ for xap_terms($p, $doc);
-		}
-		$buf .= "\0".$doc->get_data if $req->{D};
+		$buf .= "\0".$it->get_rank if $req->{R};
 		say { $req->{0} } $buf;
 	};
 	$@ ? iter_retry_check($req) : 0;
@@ -170,7 +166,8 @@ sub cmd_mset { # to be used by WWW + IMAP
 	$opt->{eidx_key} = $req->{O} if defined $req->{O};
 	$opt->{threadid} = $req->{T} if defined $req->{T};
 	my $mset = $req->{srch}->mset($qry_str, $opt);
-	say { $req->{0} } 'mset.size=', $mset->size;
+	say { $req->{0} } 'mset.size=', $mset->size,
+		' .get_matches_estimated=', $mset->get_matches_estimated;
 	for my $it ($mset->items) {
 		for (my $t = 10; $t > 0; --$t) {
 			$t = mset_iter($req, $it) // $t;
diff --git a/lib/PublicInbox/XhcMset.pm b/lib/PublicInbox/XhcMset.pm
new file mode 100644
index 00000000..7f22eec9
--- /dev/null
+++ b/lib/PublicInbox/XhcMset.pm
@@ -0,0 +1,49 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# mocks Xapian::Mset
+package PublicInbox::XhcMset;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::XhcMsetIterator;
+use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
+
+sub event_step {
+	my ($self) = @_;
+	my $rd = $self->{sock};
+	my ($cb, @args) = @{delete $self->{cb_args} // return};
+	eval {
+		my $hdr = <$rd> // die "E: reading mset header: $!";
+		for (split /\s+/, $hdr) { # read mset.size + estimated_matches
+			my ($k, $v) = split /=/, $_, 2;
+			$k =~ s/\A[^\.]*\.//; # s/(mset)?\./
+			$self->{$k} = $v;
+		}
+		$self->{size} // die "E: bad xhc header: `$hdr'";
+		my @it = map {
+			PublicInbox::XhcMsetIterator::make($_)
+		} <$rd>;
+		$self->{items} = \@it;
+	};
+	my $err = $@;
+	$self->close;
+	eval { $cb->(@args, $self, $err) };
+	warn "E: $@\n" if $@;
+}
+
+sub maybe_new {
+	my (undef, $rd, $srch, @cb_args) = @_;
+	my $self = bless { cb_args => \@cb_args, srch => $srch }, __PACKAGE__;
+	if ($PublicInbox::DS::in_loop) {
+		$self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT);
+	} else {
+		local $self->{sock} = $rd;
+		event_step($self);
+	}
+}
+
+eval(join('', map { "sub $_ { \$_[0]->{$_} }\n" } qw(size
+	get_matches_estimated)));
+
+sub items { @{$_[0]->{items}} }
+
+1;
diff --git a/lib/PublicInbox/XhcMsetIterator.pm b/lib/PublicInbox/XhcMsetIterator.pm
new file mode 100644
index 00000000..dcfc61e4
--- /dev/null
+++ b/lib/PublicInbox/XhcMsetIterator.pm
@@ -0,0 +1,20 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# mocks Xapian::MsetIterator, there's many of these allocated at once
+package PublicInbox::XhcMsetIterator;
+use v5.12;
+
+sub make ($) {
+	chomp($_[0]);
+	my @self = map { $_ + 0 } split /\0/, $_[0]; # docid, pct, rank
+	# we don't store $xdb in self[4] since we avoid $it->get_document
+	# in favor of $xdb->get_document($it->get_docid)
+	bless \@self, __PACKAGE__;
+}
+
+sub get_docid { $_[0]->[0] }
+sub get_percent { $_[0]->[1] }
+sub get_rank { $_[0]->[2] }
+
+1;
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 3456910b..6fb99cf2 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -142,7 +142,7 @@ struct req { // argv and pfxv point into global rbuf
 	bool code_search;
 	bool relevance; // sort by relevance before column
 	bool emit_percent;
-	bool emit_docdata;
+	bool emit_rank;
 	bool asc; // ascending sort
 };
 
@@ -641,7 +641,6 @@ static void dispatch(struct req *req)
 			if (MY_ARG_MAX == req->pfxc)
 				ABORT("too many -A");
 			break;
-		case 'D': req->emit_docdata = true; break;
 		case 'K':
 			req->timeout_sec = strtoul(optarg, &end, 10);
 			if (*end || req->timeout_sec == ULONG_MAX)
@@ -653,6 +652,7 @@ static void dispatch(struct req *req)
 			if (*end || req->threadid == ULLONG_MAX)
 				ABORT("-T %s", optarg);
 			break;
+		case 'R': req->emit_rank = true; break;
 		default: ABORT("bad switch `-%c'", c);
 		}
 	}
diff --git a/lib/PublicInbox/xh_mset.h b/lib/PublicInbox/xh_mset.h
index 4e97a284..33dfef54 100644
--- a/lib/PublicInbox/xh_mset.h
+++ b/lib/PublicInbox/xh_mset.h
@@ -3,20 +3,6 @@
 // This file is only intended to be included by xap_helper.h
 // it implements pieces used by WWW, IMAP and lei
 
-static void emit_doc_term(FILE *fp, const char *pfx, Xapian::Document *doc)
-{
-	Xapian::TermIterator cur = doc->termlist_begin();
-	Xapian::TermIterator end = doc->termlist_end();
-	size_t pfx_len = strlen(pfx);
-
-	for (cur.skip_to(pfx); cur != end; cur++) {
-		std::string tn = *cur;
-		if (!starts_with(&tn, pfx, pfx_len)) break;
-		fputc(0, fp);
-		fwrite(tn.data(), tn.size(), 1, fp);
-	}
-}
-
 static enum exc_iter mset_iter(const struct req *req, FILE *fp, off_t off,
 				Xapian::MSetIterator *i)
 {
@@ -24,16 +10,9 @@ static enum exc_iter mset_iter(const struct req *req, FILE *fp, off_t off,
 		fprintf(fp, "%llu", (unsigned long long)(*(*i))); // get_docid
 		if (req->emit_percent)
 			fprintf(fp, "%c%d", 0, i->get_percent());
-		if (req->pfxc || req->emit_docdata) {
-			Xapian::Document doc = i->get_document();
-			for (int p = 0; p < req->pfxc; p++)
-				emit_doc_term(fp, req->pfxv[p], &doc);
-			if (req->emit_docdata) {
-				std::string d = doc.get_data();
-				fputc(0, fp);
-				fwrite(d.data(), d.size(), 1, fp);
-			}
-		}
+		if (req->emit_rank)
+			fprintf(fp, "%c%llu", 0,
+				(unsigned long long)i->get_rank());
 		fputc('\n', fp);
 	} catch (const Xapian::DatabaseModifiedError & e) {
 		req->srch->db->reopen();
@@ -63,7 +42,9 @@ static bool cmd_mset(struct req *req)
 	Xapian::MSet mset = req->code_search ? commit_mset(req, qry_str) :
 						mail_mset(req, qry_str);
 	fbuf_init(&wbuf);
-	fprintf(wbuf.fp, "mset.size=%llu\n", (unsigned long long)mset.size());
+	fprintf(wbuf.fp, "mset.size=%llu .get_matches_estimated=%llu\n",
+		(unsigned long long)mset.size(),
+		(unsigned long long)mset.get_matches_estimated());
 	int fd = fileno(req->fp[0]);
 	for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); i++) {
 		off_t off = ftello(wbuf.fp);
diff --git a/t/cindex.t b/t/cindex.t
index e5f26ec3..aabddca8 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -147,26 +147,26 @@ if ('multi-repo search') {
 
 my $test_xhc = sub {
 	my ($xhc) = @_;
+	my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
 	my $impl = $xhc->{impl};
 	my ($r, @l);
-	$r = $xhc->mkreq([], qw(mset -D -c -g), $zp_git, @xh_args, 'NUL');
+	$r = $xhc->mkreq([], qw(mset -c -g), $zp_git, @xh_args, 'NUL');
 	chomp(@l = <$r>);
-	is(shift(@l), 'mset.size=2', "got expected header $impl");
+	like shift(@l), qr/\bmset\.size=2\b/, "got expected header $impl";
 	my %docid2data;
 	my @got = sort map {
-		my @f = split /\0/;
-		is scalar(@f), 2, 'got 2 entries';
-		$docid2data{$f[0]} = $f[1];
-		$f[1];
+		my ($docid, @extra) = split /\0/;
+		is scalar(@extra), 0, 'no extra fields';
+		$docid2data{$docid} =
+			$csrch->xdb->get_document($docid)->get_data;
 	} @l;
 	is_deeply(\@got, $exp, "expected doc_data $impl");
 
 	$r = $xhc->mkreq([], qw(mset -c -g), "$tmp/wt0/.git", @xh_args, 'NUL');
 	chomp(@l = <$r>);
-	is(shift(@l), 'mset.size=0', "got miss in wrong dir $impl");
+	like shift(@l), qr/\bmset.size=0\b/, "got miss in wrong dir $impl";
 	is_deeply(\@l, [], "no extra lines $impl");
 
-	my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
 	while (my ($did, $expect) = each %docid2data) {
 		is_deeply($csrch->xdb->get_document($did)->get_data,
 			$expect, "docid=$did data matches");
@@ -179,14 +179,15 @@ SKIP: {
 	require_mods('+SCM_RIGHTS', 1);
 	require PublicInbox::XapClient;
 	my $xhc = PublicInbox::XapClient::start_helper('-j0');
-	$test_xhc->($xhc);
+	my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
+	$test_xhc->($xhc, $csrch);
 	skip 'PI_NO_CXX set', 1 if $ENV{PI_NO_CXX};
 	$xhc->{impl} =~ /Cxx/ or
 		skip 'C++ compiler or xapian development libs missing', 1;
 	skip 'TEST_XH_CXX_ONLY set', 1 if $ENV{TEST_XH_CXX_ONLY};
 	local $ENV{PI_NO_CXX} = 1; # force XS or SWIG binding test
 	$xhc = PublicInbox::XapClient::start_helper('-j0');
-	$test_xhc->($xhc);
+	$test_xhc->($xhc, $csrch);
 }
 
 if ('--update') {
diff --git a/t/xap_helper.t b/t/xap_helper.t
index 0f474608..d1394090 100644
--- a/t/xap_helper.t
+++ b/t/xap_helper.t
@@ -204,26 +204,26 @@ for my $n (@NO_CXX) {
 	$err = do { local $/; <$err_r> };
 	is $err, "mset.size=6 nr_out=5\n", "got expected status ($xhc->{impl})";
 
-	$r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args,
+	$r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
 				'dfn:lib/PublicInbox/Search.pm');
 	chomp((my $hdr, @res) = readline($r));
-	is $hdr, 'mset.size=1', "got expected header via mset ($xhc->{impl}";
+	like $hdr, qr/\bmset\.size=1\b/,
+		"got expected header via mset ($xhc->{impl}";
 	is scalar(@res), 1, 'got one result';
 	@res = split /\0/, $res[0];
 	{
 		my $doc = $v2->search->xdb->get_document($res[0]);
+		ok $doc, 'valid document retrieved';
 		my @q = PublicInbox::Search::xap_terms('Q', $doc);
 		is_deeply \@q, [ $mid ], 'docid usable';
 	}
 	ok $res[1] > 0 && $res[1] <= 100, 'pct > 0 && <= 100';
-	is $res[2], 'XDFID'.$dfid, 'XDFID result matches';
-	is $res[3], 'Q'.$mid, 'Q (msgid) mset result matches';
-	is scalar(@res), 4, 'only 4 columns in result';
+	is scalar(@res), 2, 'only 2 columns in result';
 
-	$r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args,
+	$r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
 				'dt:19700101'.'000000..');
 	chomp(($hdr, @res) = readline($r));
-	is $hdr, 'mset.size=6',
+	like $hdr, qr/\bmset\.size=6\b/,
 		"got expected header via multi-result mset ($xhc->{impl}";
 	is(scalar(@res), 6, 'got 6 rows');
 	for my $r (@res) {
@@ -231,17 +231,7 @@ for my $n (@NO_CXX) {
 		my $doc = $v2->search->xdb->get_document($docid);
 		ok $pct > 0 && $pct <= 100,
 			"pct > 0 && <= 100 #$docid ($xhc->{impl})";
-		my %terms;
-		for (@rest) {
-			s/\A([A-Z]+)// or xbail 'no prefix=', \@rest;
-			push @{$terms{$1}}, $_;
-		}
-		while (my ($pfx, $vals) = each %terms) {
-			@$vals = sort @$vals;
-			my @q = PublicInbox::Search::xap_terms($pfx, $doc);
-			is_deeply $vals, \@q,
-				"#$docid $pfx as expected ($xhc->{impl})";
-		}
+		is scalar(@rest), 0, 'no extra rows returned';
 	}
 	my $nr;
 	for my $i (7, 8, 39, 40) {

             reply	other threads:[~2024-04-19 12:05 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-19 12:05 Eric Wong [this message]
  -- strict thread matches above, loose matches on Subject: below --
2024-04-19 11:56 [PATCH] www: start wiring up search to use async xap_helper Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240419120555.4139653-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=spew@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).