From 8e4426e3ff9e3dde861d4722b73b5a21c42910d4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 24 Apr 2024 06:44:45 +0000 Subject: xap_helper: drop terms+data from `mset' command Retrieving Xapian document terms, data (and possibly values) and transferring to the Perl side would be an increase in complexity and I/O both the Perl and C++ sides. It would require more I/O in C++ and transient memory use on the Perl side where slow mset iteration gives an opportunity to dictate memory release rate. So lets ignore the document-related stuff here for now for ease-of-development. We can reconsider this change if dropping Xapian Perl bindings entirely and relying on JAOT C++ ever becomes a possibility. --- t/cindex.t | 17 +++++++++-------- t/xap_helper.t | 21 +++++---------------- 2 files changed, 14 insertions(+), 24 deletions(-) (limited to 't') diff --git a/t/cindex.t b/t/cindex.t index e5f26ec3..acd74a5d 100644 --- a/t/cindex.t +++ b/t/cindex.t @@ -147,17 +147,18 @@ if ('multi-repo search') { my $test_xhc = sub { my ($xhc) = @_; + my $csrch = PublicInbox::CodeSearch->new("$tmp/ext"); my $impl = $xhc->{impl}; my ($r, @l); - $r = $xhc->mkreq([], qw(mset -D -c -g), $zp_git, @xh_args, 'NUL'); + $r = $xhc->mkreq([], qw(mset -c -g), $zp_git, @xh_args, 'NUL'); chomp(@l = <$r>); is(shift(@l), 'mset.size=2', "got expected header $impl"); my %docid2data; my @got = sort map { - my @f = split /\0/; - is scalar(@f), 2, 'got 2 entries'; - $docid2data{$f[0]} = $f[1]; - $f[1]; + my ($docid, @extra) = split /\0/; + is scalar(@extra), 0, 'no extra fields'; + $docid2data{$docid} = + $csrch->xdb->get_document($docid)->get_data; } @l; is_deeply(\@got, $exp, "expected doc_data $impl"); @@ -166,7 +167,6 @@ my $test_xhc = sub { is(shift(@l), 'mset.size=0', "got miss in wrong dir $impl"); is_deeply(\@l, [], "no extra lines $impl"); - my $csrch = PublicInbox::CodeSearch->new("$tmp/ext"); while (my ($did, $expect) = each %docid2data) { is_deeply($csrch->xdb->get_document($did)->get_data, $expect, "docid=$did data matches"); @@ -179,14 +179,15 @@ SKIP: { require_mods('+SCM_RIGHTS', 1); require PublicInbox::XapClient; my $xhc = PublicInbox::XapClient::start_helper('-j0'); - $test_xhc->($xhc); + my $csrch = PublicInbox::CodeSearch->new("$tmp/ext"); + $test_xhc->($xhc, $csrch); skip 'PI_NO_CXX set', 1 if $ENV{PI_NO_CXX}; $xhc->{impl} =~ /Cxx/ or skip 'C++ compiler or xapian development libs missing', 1; skip 'TEST_XH_CXX_ONLY set', 1 if $ENV{TEST_XH_CXX_ONLY}; local $ENV{PI_NO_CXX} = 1; # force XS or SWIG binding test $xhc = PublicInbox::XapClient::start_helper('-j0'); - $test_xhc->($xhc); + $test_xhc->($xhc, $csrch); } if ('--update') { diff --git a/t/xap_helper.t b/t/xap_helper.t index 0f474608..70c634ac 100644 --- a/t/xap_helper.t +++ b/t/xap_helper.t @@ -204,7 +204,7 @@ for my $n (@NO_CXX) { $err = do { local $/; <$err_r> }; is $err, "mset.size=6 nr_out=5\n", "got expected status ($xhc->{impl})"; - $r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args, + $r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args, 'dfn:lib/PublicInbox/Search.pm'); chomp((my $hdr, @res) = readline($r)); is $hdr, 'mset.size=1', "got expected header via mset ($xhc->{impl}"; @@ -212,15 +212,14 @@ for my $n (@NO_CXX) { @res = split /\0/, $res[0]; { my $doc = $v2->search->xdb->get_document($res[0]); + ok $doc, 'valid document retrieved'; my @q = PublicInbox::Search::xap_terms('Q', $doc); is_deeply \@q, [ $mid ], 'docid usable'; } ok $res[1] > 0 && $res[1] <= 100, 'pct > 0 && <= 100'; - is $res[2], 'XDFID'.$dfid, 'XDFID result matches'; - is $res[3], 'Q'.$mid, 'Q (msgid) mset result matches'; - is scalar(@res), 4, 'only 4 columns in result'; + is scalar(@res), 2, 'only 2 columns in result'; - $r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args, + $r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args, 'dt:19700101'.'000000..'); chomp(($hdr, @res) = readline($r)); is $hdr, 'mset.size=6', @@ -231,17 +230,7 @@ for my $n (@NO_CXX) { my $doc = $v2->search->xdb->get_document($docid); ok $pct > 0 && $pct <= 100, "pct > 0 && <= 100 #$docid ($xhc->{impl})"; - my %terms; - for (@rest) { - s/\A([A-Z]+)// or xbail 'no prefix=', \@rest; - push @{$terms{$1}}, $_; - } - while (my ($pfx, $vals) = each %terms) { - @$vals = sort @$vals; - my @q = PublicInbox::Search::xap_terms($pfx, $doc); - is_deeply $vals, \@q, - "#$docid $pfx as expected ($xhc->{impl})"; - } + is scalar(@rest), 0, 'no extra rows returned'; } my $nr; for my $i (7, 8, 39, 40) { -- cgit v1.2.3-24-ge0c7