about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-04 12:09:29 +0000
committerEric Wong <e@80x24.org>2020-12-26 19:45:11 +0000
commitb63c27f36a44d8deb8fd775b2322ec11a6c4eabf (patch)
tree066dc9f0be9837e6a0fbde99dce94859c22238fa
parentc39ed01a3a4c6c4634642eb875a16538aceacfc3 (diff)
downloadpublic-inbox-b63c27f36a44d8deb8fd775b2322ec11a6c4eabf.tar.gz
We must use the result of link_refs() since it can trigger
merge_threads() and invalidate $old_tid.  In case
merge_threads() isn't triggered, link_refs() will return
$old_tid anyways.

When rethreading and allocating new {tid}, we also must update
the row where the now-expired {tid} came from to ensure only the
new {tid} is seen when reindexing subsequent messages in
history.  Otherwise, every subsequently reindexed+rethreaded
message could end up getting a new {tid}.

Reported-by: Kyle Meyer <kyle@kyleam.com>
Link: https://public-inbox.org/meta/87360nlc44.fsf@kyleam.com/
(cherry picked from commit 9356ec0cc5afc95a8fd398ddf898942ef0acdb74)
-rw-r--r--MANIFEST1
-rw-r--r--lib/PublicInbox/OverIdx.pm12
-rw-r--r--t/thread-index-gap.t57
3 files changed, 67 insertions, 3 deletions
diff --git a/MANIFEST b/MANIFEST
index f3620de4..6c1cdecc 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -354,6 +354,7 @@ t/solver_git.t
 t/spamcheck_spamc.t
 t/spawn.t
 t/thread-cycle.t
+t/thread-index-gap.t
 t/time.t
 t/uri_imap.t
 t/utf8.eml
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index db4b7738..840e2c2a 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -170,8 +170,14 @@ sub _resolve_mid_to_tid {
                 $$tid = $cur_tid;
         } else { # rethreading, queue up dead ghosts
                 $$tid = next_tid($self);
-                my $num = $smsg->{num};
-                push(@{$self->{-ghosts_to_delete}}, $num) if $num < 0;
+                my $n = $smsg->{num};
+                if ($n > 0) {
+                        $self->{dbh}->prepare_cached(<<'')->execute($$tid, $n);
+UPDATE over SET tid = ? WHERE num = ?
+
+                } elsif ($n < 0) {
+                        push(@{$self->{-ghosts_to_delete}}, $n);
+                }
         }
         1;
 }
@@ -294,7 +300,7 @@ sub _add_over {
                 }
         } elsif ($n < 0) { # ghost
                 $$old_tid //= $cur_valid ? $cur_tid : next_tid($self);
-                link_refs($self, $refs, $$old_tid);
+                $$old_tid = link_refs($self, $refs, $$old_tid);
                 delete_by_num($self, $n);
                 $$v++;
         }
diff --git a/t/thread-index-gap.t b/t/thread-index-gap.t
new file mode 100644
index 00000000..49f254e9
--- /dev/null
+++ b/t/thread-index-gap.t
@@ -0,0 +1,57 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Eml;
+use PublicInbox::InboxWritable;
+use PublicInbox::Config;
+use List::Util qw(shuffle);
+require_mods(qw(DBD::SQLite));
+require_git(2.6);
+
+chomp(my @msgs = split(/\n\n/, <<'EOF')); # "git log" order
+Subject: [bug#45000] [PATCH 1/9]
+References: <20201202045335.31096-1-j@example.com>
+Message-Id: <20201202045540.31248-1-j@example.com>
+
+Subject: [bug#45000] [PATCH 0/9]
+Message-Id: <20201202045335.31096-1-j@example.com>
+
+Subject: [bug#45000] [PATCH 0/9]
+References: <20201202045335.31096-1-j@example.com>
+Message-ID: <86sg8o1mou.fsf@example.com>
+
+Subject: [bug#45000] [PATCH 8/9]
+Message-Id: <20201202045540.31248-8-j@example.com>
+References: <20201202045540.31248-1-j@example.com>
+
+EOF
+
+my ($home, $for_destroy) = tmpdir();
+local $ENV{HOME} = $home;
+for my $msgs (['orig', reverse @msgs], ['shuffle', shuffle(@msgs)]) {
+        my $desc = shift @$msgs;
+        my $n = "index-cap-$desc";
+        run_script([qw(-init -L basic -V2), $n, "$home/$n",
+                "http://example.com/$n", "$n\@example.com"]) or
+                BAIL_OUT 'init';
+        my $ibx = PublicInbox::Config->new->lookup_name($n);
+        my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
+        for my $m (@$msgs) {
+                $im->add(PublicInbox::Eml->new("$m\nFrom: x\@example.com\n\n"));
+        }
+        $im->done;
+        my $over = $ibx->over;
+        my @tid = $over->dbh->selectall_array('SELECT DISTINCT(tid) FROM over');
+        is(scalar(@tid), 1, "only one thread initially ($desc)");
+        $over->dbh_close;
+        run_script([qw(-index --reindex --rethread), $ibx->{inboxdir}]) or
+                BAIL_OUT 'rethread';
+        @tid = $over->dbh->selectall_array('SELECT DISTINCT(tid) FROM over');
+        is(scalar(@tid), 1, "only one thread after rethread ($desc)");
+}
+
+done_testing;