dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH] xap_helper: drop terms+data from `mset' command
@ 2024-04-19  1:18 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2024-04-19  1:18 UTC (permalink / raw)
  To: spew

Retrieving Xapian document terms, data (and possibly values) and
transferring to the Perl side would be an increase in complexity
on both the Perl and C++ sides.  It would require more I/O and
transient memory use on the Perl side, so lets ignore the
document-related stuff here for now for ease-of-development.

We can reconsider this change if dropping Xapian Perl bindings
entirely and relying on JAOT C++ ever becomes a possibility.
---
 lib/PublicInbox/XapHelper.pm |  5 -----
 lib/PublicInbox/xh_mset.h    | 10 ----------
 t/cindex.t                   | 17 +++++++++--------
 t/xap_helper.t               | 21 +++++----------------
 4 files changed, 14 insertions(+), 39 deletions(-)

diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index 143853cd..ea13dce1 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -151,11 +151,6 @@ sub mset_iter ($$) {
 		my $buf = $it->get_docid;
 		$buf .= "\0".$it->get_percent if $req->{p};
 		$buf .= "\0".$it->get_rank if $req->{R};
-		my $doc = ($req->{A} || $req->{D}) ? $it->get_document : undef;
-		for my $p (@{$req->{A}}) {
-			$buf .= "\0".$p.$_ for xap_terms($p, $doc);
-		}
-		$buf .= "\0".$doc->get_data if $req->{D};
 		say { $req->{0} } $buf;
 	};
 	$@ ? iter_retry_check($req) : 0;
diff --git a/lib/PublicInbox/xh_mset.h b/lib/PublicInbox/xh_mset.h
index 742b7811..69921687 100644
--- a/lib/PublicInbox/xh_mset.h
+++ b/lib/PublicInbox/xh_mset.h
@@ -27,16 +27,6 @@ static enum exc_iter mset_iter(const struct req *req, FILE *fp, off_t off,
 		if (req->emit_rank)
 			fprintf(fp, "%c%llu", 0,
 				(unsigned long long)i->get_rank());
-		if (req->pfxc || req->emit_docdata) {
-			Xapian::Document doc = i->get_document();
-			for (int p = 0; p < req->pfxc; p++)
-				emit_doc_term(fp, req->pfxv[p], &doc);
-			if (req->emit_docdata) {
-				std::string d = doc.get_data();
-				fputc(0, fp);
-				fwrite(d.data(), d.size(), 1, fp);
-			}
-		}
 		fputc('\n', fp);
 	} catch (const Xapian::DatabaseModifiedError & e) {
 		req->srch->db->reopen();
diff --git a/t/cindex.t b/t/cindex.t
index d3e79197..aabddca8 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -147,17 +147,18 @@ if ('multi-repo search') {
 
 my $test_xhc = sub {
 	my ($xhc) = @_;
+	my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
 	my $impl = $xhc->{impl};
 	my ($r, @l);
-	$r = $xhc->mkreq([], qw(mset -D -c -g), $zp_git, @xh_args, 'NUL');
+	$r = $xhc->mkreq([], qw(mset -c -g), $zp_git, @xh_args, 'NUL');
 	chomp(@l = <$r>);
 	like shift(@l), qr/\bmset\.size=2\b/, "got expected header $impl";
 	my %docid2data;
 	my @got = sort map {
-		my @f = split /\0/;
-		is scalar(@f), 2, 'got 2 entries';
-		$docid2data{$f[0]} = $f[1];
-		$f[1];
+		my ($docid, @extra) = split /\0/;
+		is scalar(@extra), 0, 'no extra fields';
+		$docid2data{$docid} =
+			$csrch->xdb->get_document($docid)->get_data;
 	} @l;
 	is_deeply(\@got, $exp, "expected doc_data $impl");
 
@@ -166,7 +167,6 @@ my $test_xhc = sub {
 	like shift(@l), qr/\bmset.size=0\b/, "got miss in wrong dir $impl";
 	is_deeply(\@l, [], "no extra lines $impl");
 
-	my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
 	while (my ($did, $expect) = each %docid2data) {
 		is_deeply($csrch->xdb->get_document($did)->get_data,
 			$expect, "docid=$did data matches");
@@ -179,14 +179,15 @@ SKIP: {
 	require_mods('+SCM_RIGHTS', 1);
 	require PublicInbox::XapClient;
 	my $xhc = PublicInbox::XapClient::start_helper('-j0');
-	$test_xhc->($xhc);
+	my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
+	$test_xhc->($xhc, $csrch);
 	skip 'PI_NO_CXX set', 1 if $ENV{PI_NO_CXX};
 	$xhc->{impl} =~ /Cxx/ or
 		skip 'C++ compiler or xapian development libs missing', 1;
 	skip 'TEST_XH_CXX_ONLY set', 1 if $ENV{TEST_XH_CXX_ONLY};
 	local $ENV{PI_NO_CXX} = 1; # force XS or SWIG binding test
 	$xhc = PublicInbox::XapClient::start_helper('-j0');
-	$test_xhc->($xhc);
+	$test_xhc->($xhc, $csrch);
 }
 
 if ('--update') {
diff --git a/t/xap_helper.t b/t/xap_helper.t
index c2fec6fc..d1394090 100644
--- a/t/xap_helper.t
+++ b/t/xap_helper.t
@@ -204,7 +204,7 @@ for my $n (@NO_CXX) {
 	$err = do { local $/; <$err_r> };
 	is $err, "mset.size=6 nr_out=5\n", "got expected status ($xhc->{impl})";
 
-	$r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args,
+	$r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
 				'dfn:lib/PublicInbox/Search.pm');
 	chomp((my $hdr, @res) = readline($r));
 	like $hdr, qr/\bmset\.size=1\b/,
@@ -213,15 +213,14 @@ for my $n (@NO_CXX) {
 	@res = split /\0/, $res[0];
 	{
 		my $doc = $v2->search->xdb->get_document($res[0]);
+		ok $doc, 'valid document retrieved';
 		my @q = PublicInbox::Search::xap_terms('Q', $doc);
 		is_deeply \@q, [ $mid ], 'docid usable';
 	}
 	ok $res[1] > 0 && $res[1] <= 100, 'pct > 0 && <= 100';
-	is $res[2], 'XDFID'.$dfid, 'XDFID result matches';
-	is $res[3], 'Q'.$mid, 'Q (msgid) mset result matches';
-	is scalar(@res), 4, 'only 4 columns in result';
+	is scalar(@res), 2, 'only 2 columns in result';
 
-	$r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args,
+	$r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
 				'dt:19700101'.'000000..');
 	chomp(($hdr, @res) = readline($r));
 	like $hdr, qr/\bmset\.size=6\b/,
@@ -232,17 +231,7 @@ for my $n (@NO_CXX) {
 		my $doc = $v2->search->xdb->get_document($docid);
 		ok $pct > 0 && $pct <= 100,
 			"pct > 0 && <= 100 #$docid ($xhc->{impl})";
-		my %terms;
-		for (@rest) {
-			s/\A([A-Z]+)// or xbail 'no prefix=', \@rest;
-			push @{$terms{$1}}, $_;
-		}
-		while (my ($pfx, $vals) = each %terms) {
-			@$vals = sort @$vals;
-			my @q = PublicInbox::Search::xap_terms($pfx, $doc);
-			is_deeply $vals, \@q,
-				"#$docid $pfx as expected ($xhc->{impl})";
-		}
+		is scalar(@rest), 0, 'no extra rows returned';
 	}
 	my $nr;
 	for my $i (7, 8, 39, 40) {

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2024-04-19  1:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-19  1:18 [PATCH] xap_helper: drop terms+data from `mset' command Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).