about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--Documentation/public-inbox-config.pod8
-rw-r--r--lib/PublicInbox/Config.pm2
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm43
-rw-r--r--t/extsearch.t32
4 files changed, 69 insertions, 16 deletions
diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod
index 05d9ca62..5b86ef6c 100644
--- a/Documentation/public-inbox-config.pod
+++ b/Documentation/public-inbox-config.pod
@@ -124,6 +124,14 @@ allow for searching for phrases using quoted text.
 
 Default: C<full>
 
+=item publicinbox.<name>.boost
+
+Control indexing order for L<public-inbox-extindex(1)>, with ties
+broken by config file order.  This only affects indexing and does
+not affect messages which are already indexed.
+
+Default: C<0>
+
 =item publicinbox.<name>.indexSequentialShard
 
 See L<public-inbox-index(1)/publicInbox.indexSequentialShard>
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 8e46328d..7aa1f6c8 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -429,7 +429,7 @@ sub _fill_ibx {
                 $ibx->{$k} = $v if defined $v;
         }
         for my $k (qw(filter inboxdir newsgroup replyto httpbackendmax feedmax
-                        indexlevel indexsequentialshard)) {
+                        indexlevel indexsequentialshard boost)) {
                 my $v = get_1($self, $pfx, $k) // next;
                 $ibx->{$k} = $v;
         }
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 0e27bba6..357312b8 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -44,7 +44,7 @@ sub new {
                 topdir => $dir,
                 creat => $opt->{creat},
                 ibx_map => {}, # (newsgroup//inboxdir) => $ibx
-                ibx_list => [],
+                ibx_cfg => [], # by config section order
                 indexlevel => $l,
                 transact_bytes => 0,
                 total_bytes => 0,
@@ -62,7 +62,8 @@ sub new {
 sub attach_inbox {
         my ($self, $ibx) = @_;
         $self->{ibx_map}->{$ibx->eidx_key} //= do {
-                push @{$self->{ibx_list}}, $ibx;
+                delete $self->{-ibx_ary}; # invalidate cache
+                push @{$self->{ibx_cfg}}, $ibx;
                 $ibx;
         }
 }
@@ -388,7 +389,7 @@ sub _ibx_for ($$$) {
         my ($self, $sync, $smsg) = @_;
         my $ibx_id = delete($smsg->{ibx_id}) // die '{ibx_id} unset';
         my $pos = $sync->{id2pos}->{$ibx_id} // die "$ibx_id no pos";
-        $self->{ibx_list}->[$pos] // die "BUG: ibx for $smsg->{blob} not mapped"
+        $self->{-ibx_ary}->[$pos] // die "BUG: ibx for $smsg->{blob} not mapped"
 }
 
 sub _fd_constrained ($) {
@@ -402,7 +403,7 @@ sub _fd_constrained ($) {
                         chomp($soft = `sh -c 'ulimit -n'`);
                 }
                 if (defined($soft)) {
-                        my $want = scalar(@{$self->{ibx_list}}) + 64; # estimate
+                        my $want = scalar(@{$self->{-ibx_ary}}) + 64; # estimate
                         my $ret = $want > $soft;
                         if ($ret) {
                                 warn <<EOF;
@@ -524,10 +525,10 @@ BUG? #$docid $smsg->{blob} is not referenced by inboxes during reindex
                 return;
         }
 
-        # we sort {xr3r} in the reverse order of {ibx_list} so we can
+        # we sort {xr3r} in the reverse order of ibx_sorted so we can
         # hit the common case in _reindex_finalize without rereading
         # from git (or holding multiple messages in memory).
-        my $id2pos = $sync->{id2pos}; # index in {ibx_list}
+        my $id2pos = $sync->{id2pos}; # index in ibx_sorted
         @$xr3 = sort {
                 $id2pos->{$b->[0]} <=> $id2pos->{$a->[0]}
                                 ||
@@ -621,6 +622,17 @@ EOF
         undef;
 }
 
+sub ibx_sorted ($) {
+        my ($self) = @_;
+        $self->{-ibx_ary} //= do {
+                # highest boost first, stable for config-ordering tiebreaker
+                use sort 'stable';
+                [ sort {
+                        ($b->{boost} // 0) <=> ($a->{boost} // 0)
+                  } @{$self->{ibx_cfg}} ];
+        }
+}
+
 sub eidxq_process ($$) { # for reindexing
         my ($self, $sync) = @_;
 
@@ -638,7 +650,7 @@ sub eidxq_process ($$) { # for reindexing
         $sync->{id2pos} //= do {
                 my %id2pos;
                 my $pos = 0;
-                $id2pos{$_->{-ibx_id}} = $pos++ for @{$self->{ibx_list}};
+                $id2pos{$_->{-ibx_id}} = $pos++ for (@{ibx_sorted($self)});
                 \%id2pos;
         };
         my ($del, $iter);
@@ -829,7 +841,7 @@ sub eidx_reindex {
                 warn "E: aborting --reindex\n";
                 return;
         }
-        for my $ibx (@{$self->{ibx_list}}) {
+        for my $ibx (@{ibx_sorted($self)}) {
                 _reindex_inbox($self, $sync, $ibx);
                 last if $sync->{quit};
         }
@@ -959,7 +971,7 @@ sub eidx_sync { # main entry point
         local $SIG{QUIT} = $quit;
         local $SIG{INT} = $quit;
         local $SIG{TERM} = $quit;
-        for my $ibx (@{$self->{ibx_list}}) {
+        for my $ibx (@{ibx_sorted($self)}) {
                 $ibx->{-ibx_id} //= $self->{oidx}->ibx_id($ibx->eidx_key);
         }
         if (delete($opt->{dedupe})) {
@@ -973,7 +985,7 @@ sub eidx_sync { # main entry point
 
         # don't use $_ here, it'll get clobbered by reindex_checkpoint
         if ($opt->{scan} // 1) {
-                for my $ibx (@{$self->{ibx_list}}) {
+                for my $ibx (@{ibx_sorted($self)}) {
                         last if $sync->{quit};
                         sync_inbox($self, $sync, $ibx);
                 }
@@ -1115,7 +1127,7 @@ sub idx_init { # similar to V2Writable
                 }
                 undef $dh;
         }
-        for my $ibx (@{$self->{ibx_list}}) {
+        for my $ibx (@{ibx_sorted($self)}) {
                 # create symlinks for multi-pack-index
                 $git_midx += symlink_packs($ibx, $pd);
                 # add new lines to our alternates file
@@ -1180,7 +1192,8 @@ sub eidx_reload { # -extindex --watch SIGHUP handler
                 my $pr = $self->{-watch_sync}->{-opt}->{-progress};
                 $pr->('reloading ...') if $pr;
                 delete $self->{-resync_queue};
-                @{$self->{ibx_list}} = ();
+                delete $self->{-ibx_ary};
+                $self->{ibx_cfg} = [];
                 %{$self->{ibx_map}} = ();
                 delete $self->{-watch_sync}->{id2pos};
                 my $cfg = PublicInbox::Config->new;
@@ -1194,7 +1207,7 @@ sub eidx_reload { # -extindex --watch SIGHUP handler
 
 sub eidx_resync_start ($) { # -extindex --watch SIGUSR1 handler
         my ($self) = @_;
-        $self->{-resync_queue} //= [ @{$self->{ibx_list}} ];
+        $self->{-resync_queue} //= [ @{ibx_sorted($self)} ];
         PublicInbox::DS::requeue($self); # trigger our ->event_step
 }
 
@@ -1225,9 +1238,9 @@ sub eidx_watch { # public-inbox-extindex --watch main loop
         require PublicInbox::Sigfd;
         my $idler = PublicInbox::InboxIdle->new($self->{cfg});
         if (!$self->{cfg}) {
-                $idler->watch_inbox($_) for @{$self->{ibx_list}};
+                $idler->watch_inbox($_) for (@{ibx_sorted($self)});
         }
-        $_->subscribe_unlock(__PACKAGE__, $self) for @{$self->{ibx_list}};
+        $_->subscribe_unlock(__PACKAGE__, $self) for (@{ibx_sorted($self)});
         my $pr = $opt->{-progress};
         $pr->("performing initial scan ...\n") if $pr;
         my $sync = eidx_sync($self, $opt); # initial sync
diff --git a/t/extsearch.t b/t/extsearch.t
index 5f0cd866..46a6f2ec 100644
--- a/t/extsearch.t
+++ b/t/extsearch.t
@@ -60,6 +60,38 @@ ok(run_script([qw(-extindex --all), "$home/extindex"]), 'extindex init');
         ok($es->has_threadid, '->has_threadid');
 }
 
+if ('with boost') {
+        xsys([qw(git config publicinbox.v1test.boost), 10],
+                { GIT_CONFIG => $cfg_path });
+        ok(run_script([qw(-extindex --all), "$home/extindex-b"]),
+                'extindex init with boost');
+        my $es = PublicInbox::ExtSearch->new("$home/extindex-b");
+        my $smsg = $es->over->get_art(1);
+        ok($smsg, 'got first article');
+        my $xref3 = $es->over->get_xref3($smsg->{num});
+        my @v1 = grep(/\Av1/, @$xref3);
+        my @v2 = grep(/\Av2/, @$xref3);
+        like($v1[0], qr/\Av1\.example.*?\b\Q$smsg->{blob}\E\b/,
+                'smsg->{blob} respected boost');
+        is(scalar(@$xref3), 2, 'only to entries');
+        undef $es;
+
+        xsys([qw(git config publicinbox.v2test.boost), 20],
+                { GIT_CONFIG => $cfg_path });
+        ok(run_script([qw(-extindex --all --reindex), "$home/extindex-b"]),
+                'extindex --reindex with altered boost');
+
+        $es = PublicInbox::ExtSearch->new("$home/extindex-b");
+        $smsg = $es->over->get_art(1);
+        like($v2[0], qr/\Av2\.example.*?\b\Q$smsg->{blob}\E\b/,
+                        'smsg->{blob} respects boost after reindex');
+
+        xsys([qw(git config --unset publicinbox.v1test.boost)],
+                { GIT_CONFIG => $cfg_path });
+        xsys([qw(git config --unset publicinbox.v2test.boost)],
+                { GIT_CONFIG => $cfg_path });
+}
+
 { # TODO: -extindex should write this to config
         open $fh, '>>', $cfg_path or BAIL_OUT $!;
         print $fh <<EOF or BAIL_OUT $!;