From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] xcpdb: preserve indexlevel for extindex
Date: Wed, 26 Apr 2023 00:05:00 +0000 [thread overview]
Message-ID: <20230426000500.2647052-1-e@80x24.org> (raw)
This probably fixes indexlevel preservation for some v2 on some
systems, too, since (apparently) we need to sort shards
numerically to get Xapian metadata working properly on a
combined (multi-shard) Xapian DB.
---
lib/PublicInbox/Xapcmd.pm | 9 ++++-----
t/extsearch.t | 31 +++++++++++++++++++++++++++++--
2 files changed, 33 insertions(+), 7 deletions(-)
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index c87baa7b..3a4c5622 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -219,7 +219,7 @@ sub prepare_run {
my @old_shards;
while (defined(my $dn = readdir($dh))) {
if ($dn =~ /\A[0-9]+\z/) {
- push @old_shards, $dn;
+ push(@old_shards, $dn + 0);
} elsif ($dn eq '.' || $dn eq '..') {
} elsif ($dn =~ /\Aover\.sqlite3/) {
} elsif ($dn eq 'misc' && $misc_ok) {
@@ -228,7 +228,7 @@ sub prepare_run {
}
}
die "No Xapian shards found in $old\n" unless @old_shards;
-
+ @old_shards = sort { $a <=> $b } @old_shards;
my ($src, $max_shard);
if (!defined($reshard) || $reshard == scalar(@old_shards)) {
# 1:1 copy
@@ -464,11 +464,10 @@ sub cpdb ($$) { # cb_spawn callback
$dst->set_metadata('last_commit', $lc) if $lc;
# only the first xapian shard (0) gets 'indexlevel'
- if ($new =~ m!(?:xapian[0-9]+|xap[0-9]+/0)\b!) {
+ if ($new =~ m!/(?:xapian[0-9]+|(?:ei|xap)[0-9]+/0)\b!) {
my $l = $src->get_metadata('indexlevel');
- if ($l eq 'medium') {
+ $l eq 'medium' and
$dst->set_metadata('indexlevel', $l);
- }
}
if ($pr_data) {
my $tot = $src->get_doccount;
diff --git a/t/extsearch.t b/t/extsearch.t
index 03bcad95..8cbd26f0 100644
--- a/t/extsearch.t
+++ b/t/extsearch.t
@@ -1,8 +1,7 @@
#!perl -w
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use Test::More;
+use v5.12;
use PublicInbox::TestCommon;
use PublicInbox::Config;
use PublicInbox::InboxWritable;
@@ -554,4 +553,32 @@ EOM
is_deeply($x, $o, 'xref3 and over docids match');
}
+{
+ my $d = "$home/eidx-med";
+ ok(run_script([qw(-extindex --dangerous --all -L medium -j3), $d]),
+ 'extindex medium init');
+ my $es = PublicInbox::ExtSearch->new($d);
+ is($es->xdb->get_metadata('indexlevel'), 'medium',
+ 'es indexlevel before');
+ my @xdb = $es->xdb_shards_flat;
+ is($xdb[0]->get_metadata('indexlevel'), 'medium',
+ '0 indexlevel before');
+ shift @xdb;
+ for (@xdb) {
+ ok(!$_->get_metadata('indexlevel'), 'no indexlevel in >0 shard')
+ }
+ is($es->xdb->get_metadata('indexlevel'), 'medium', 'indexlevel before');
+ ok(run_script([qw(-xcpdb -R5), $d]), 'xcpdb R5');
+ $es = PublicInbox::ExtSearch->new($d);
+ is($es->xdb->get_metadata('indexlevel'), 'medium',
+ '0 indexlevel after');
+ @xdb = $es->xdb_shards_flat;
+ is(scalar(@xdb), 5, 'got 5 shards');
+ is($xdb[0]->get_metadata('indexlevel'), 'medium', '0 indexlevel after');
+ shift @xdb;
+ for (@xdb) {
+ ok(!$_->get_metadata('indexlevel'), 'no indexlevel in >0 shard')
+ }
+}
+
done_testing;
reply other threads:[~2023-04-26 0:05 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230426000500.2647052-1-e@80x24.org \
--to=e@80x24.org \
--cc=spew@80x24.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).