From 1bc79688ecf6bc79951c2c6ad6b1dbff1586af3f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 27 May 2019 18:45:45 +0000 Subject: v2: fix reindex skipping NNTP article numbers `public-inbox-index --reindex' could cause NNTP article number gaps to form when it also has to deal with new, never-before-seen commits in mirrors running off `git fetch'. Fix this by running two distinct invocations of ->index_sync; once to only reindex old commits, and a second time to index new commits. This does not appear to be a problem on v1 at the moment, but I'll need more time to analyze this. --- lib/PublicInbox/V2Writable.pm | 25 ++++++++++++++++++++++++- t/indexlevels-mirror.t | 25 +++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index cd08acd7..331c4f4f 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -850,11 +850,19 @@ sub index_prepare { my $pr = $opts->{-progress}; my $regen_max = 0; my $head = $self->{-inbox}->{ref_head} || 'refs/heads/master'; + + # reindex stops at the current heads and we later rerun index_sync + # without {reindex} + my $reindex_heads = last_commits($self, $epoch_max) if $opts->{reindex}; + for (my $i = $epoch_max; $i >= 0; $i--) { die 'BUG: already indexing!' if $self->{reindex_pipe}; my $git_dir = git_dir_n($self, $i); -d $git_dir or next; # missing parts are fine my $git = PublicInbox::Git->new($git_dir); + if ($reindex_heads) { + $head = $reindex_heads->[$i] or next; + } chomp(my $tip = $git->qx(qw(rev-parse -q --verify), $head)); next if $?; # new repo @@ -959,7 +967,14 @@ sub index_sync { my $high = $self->{mm}->num_highwater(); my $regen = $self->index_prepare($opts, $epoch_max, $ranges); - $$regen += $high if $high; + if ($opts->{reindex}) { + # reindex should NOT see new commits anymore, if we do, + # it's a problem and we need to notice it via die() + $$regen = -1; + } else { + $$regen += $high; + } + my $D = {}; # "$mid\0$cid" => $oid my @cmd = qw(log --raw -r --pretty=tformat:%H --no-notes --no-color --no-abbrev --no-renames); @@ -1001,6 +1016,14 @@ sub index_sync { $git->cleanup; } $self->done; + + # reindex does not pick up new changes, so we rerun w/o it: + if ($opts->{reindex}) { + my %again = %$opts; + $mm_tmp = undef; + delete @again{qw(reindex -skip_lock)}; + index_sync($self, \%again); + } } 1; diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t index ce138fee..12511368 100644 --- a/t/indexlevels-mirror.t +++ b/t/indexlevels-mirror.t @@ -105,9 +105,17 @@ sub import_index_incremental { is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], ['m@1','m@2'], 'got both messages in master'); + my @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; + is_deeply(\@rw_nums, [1, 2], 'master has expected NNTP articles'); + + my @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)}; + is_deeply(\@ro_nums, [1, 2], 'mirror has expected NNTP articles'); + # remove message from master ok($im->remove($mime), '2nd message removed'); $im->done; + @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; + is_deeply(\@rw_nums, [1], 'unindex NNTP article'.$v.$level); if ($level ne 'basic') { is(system(@xcpdb, $mirror), 0, "v$v xcpdb OK"); @@ -132,6 +140,23 @@ sub import_index_incremental { ($nr, $msgs) = $ro_mirror->search->reopen->query('m:m@2'); is($nr, 0, "v$v m\@2 gone from Xapian in mirror on $level"); } + + # add another message to master and have the mirror + # sync and reindex it + my @expect = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; + foreach my $i (3..5) { + $mime->header_set('Message-ID', ""); + ok($im->add($mime), "#$i message added"); + push @expect, $i; + } + $im->done; + is(system('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); + is(system($index, '--reindex', $mirror), 0, + "v$v index --reindex mirror OK"); + @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)}; + @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; + is_deeply(\@rw_nums, \@expect, "v$v master has expected NNTP articles"); + is_deeply(\@ro_nums, \@expect, "v$v mirror matches master articles"); } # we can probably cull some other tests and put full/medium tests, here -- cgit v1.2.3-24-ge0c7