about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-05-27 18:45:45 +0000
committerEric Wong <e@80x24.org>2019-05-27 18:48:06 +0000
commit1bc79688ecf6bc79951c2c6ad6b1dbff1586af3f (patch)
tree6dcf9283c2c8cb4ddd9d65bf3a1acab7fff10e02
parentf7636f7f2343d6ac134b35b447e57fa2a38feba1 (diff)
downloadpublic-inbox-1bc79688ecf6bc79951c2c6ad6b1dbff1586af3f.tar.gz
`public-inbox-index --reindex' could cause NNTP article number
gaps to form when it also has to deal with new,
never-before-seen commits in mirrors running off `git fetch'.

Fix this by running two distinct invocations of ->index_sync;
once to only reindex old commits, and a second time to index
new commits.

This does not appear to be a problem on v1 at the moment,
but I'll need more time to analyze this.
-rw-r--r--lib/PublicInbox/V2Writable.pm25
-rw-r--r--t/indexlevels-mirror.t25
2 files changed, 49 insertions, 1 deletions
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index cd08acd7..331c4f4f 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -850,11 +850,19 @@ sub index_prepare {
         my $pr = $opts->{-progress};
         my $regen_max = 0;
         my $head = $self->{-inbox}->{ref_head} || 'refs/heads/master';
+
+        # reindex stops at the current heads and we later rerun index_sync
+        # without {reindex}
+        my $reindex_heads = last_commits($self, $epoch_max) if $opts->{reindex};
+
         for (my $i = $epoch_max; $i >= 0; $i--) {
                 die 'BUG: already indexing!' if $self->{reindex_pipe};
                 my $git_dir = git_dir_n($self, $i);
                 -d $git_dir or next; # missing parts are fine
                 my $git = PublicInbox::Git->new($git_dir);
+                if ($reindex_heads) {
+                        $head = $reindex_heads->[$i] or next;
+                }
                 chomp(my $tip = $git->qx(qw(rev-parse -q --verify), $head));
 
                 next if $?; # new repo
@@ -959,7 +967,14 @@ sub index_sync {
 
         my $high = $self->{mm}->num_highwater();
         my $regen = $self->index_prepare($opts, $epoch_max, $ranges);
-        $$regen += $high if $high;
+        if ($opts->{reindex}) {
+                # reindex should NOT see new commits anymore, if we do,
+                # it's a problem and we need to notice it via die()
+                $$regen = -1;
+        } else {
+                $$regen += $high;
+        }
+
         my $D = {}; # "$mid\0$cid" => $oid
         my @cmd = qw(log --raw -r --pretty=tformat:%H
                         --no-notes --no-color --no-abbrev --no-renames);
@@ -1001,6 +1016,14 @@ sub index_sync {
                 $git->cleanup;
         }
         $self->done;
+
+        # reindex does not pick up new changes, so we rerun w/o it:
+        if ($opts->{reindex}) {
+                my %again = %$opts;
+                $mm_tmp = undef;
+                delete @again{qw(reindex -skip_lock)};
+                index_sync($self, \%again);
+        }
 }
 
 1;
diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t
index ce138fee..12511368 100644
--- a/t/indexlevels-mirror.t
+++ b/t/indexlevels-mirror.t
@@ -105,9 +105,17 @@ sub import_index_incremental {
         is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs],
                 ['m@1','m@2'], 'got both messages in master');
 
+        my @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)};
+        is_deeply(\@rw_nums, [1, 2], 'master has expected NNTP articles');
+
+        my @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)};
+        is_deeply(\@ro_nums, [1, 2], 'mirror has expected NNTP articles');
+
         # remove message from master
         ok($im->remove($mime), '2nd message removed');
         $im->done;
+        @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)};
+        is_deeply(\@rw_nums, [1], 'unindex NNTP article'.$v.$level);
 
         if ($level ne 'basic') {
                 is(system(@xcpdb, $mirror), 0, "v$v xcpdb OK");
@@ -132,6 +140,23 @@ sub import_index_incremental {
                 ($nr, $msgs) = $ro_mirror->search->reopen->query('m:m@2');
                 is($nr, 0, "v$v m\@2 gone from Xapian in mirror on $level");
         }
+
+        # add another message to master and have the mirror
+        # sync and reindex it
+        my @expect = map { $_->{num} } @{$ibx->over->query_ts(0, 0)};
+        foreach my $i (3..5) {
+                $mime->header_set('Message-ID', "<m\@$i>");
+                ok($im->add($mime), "#$i message added");
+                push @expect, $i;
+        }
+        $im->done;
+        is(system('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
+        is(system($index, '--reindex', $mirror), 0,
+                "v$v index --reindex mirror OK");
+        @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)};
+        @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)};
+        is_deeply(\@rw_nums, \@expect, "v$v master has expected NNTP articles");
+        is_deeply(\@ro_nums, \@expect, "v$v mirror matches master articles");
 }
 
 # we can probably cull some other tests and put full/medium tests, here