about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Admin.pm5
-rw-r--r--lib/PublicInbox/Compat.pm4
-rw-r--r--lib/PublicInbox/Config.pm6
-rw-r--r--lib/PublicInbox/DS.pm407
-rw-r--r--lib/PublicInbox/DSdeflate.pm11
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm8
-rw-r--r--lib/PublicInbox/Gcf2.pm2
-rw-r--r--lib/PublicInbox/Git.pm27
-rw-r--r--lib/PublicInbox/GitCredential.pm3
-rw-r--r--lib/PublicInbox/GitHTTPBackend.pm4
-rw-r--r--lib/PublicInbox/GzipFilter.pm6
-rw-r--r--lib/PublicInbox/HTTP.pm33
-rw-r--r--lib/PublicInbox/IMAP.pm14
-rw-r--r--lib/PublicInbox/Import.pm3
-rw-r--r--lib/PublicInbox/LEI.pm9
-rw-r--r--lib/PublicInbox/LeiBlob.pm4
-rw-r--r--lib/PublicInbox/LeiConfig.pm3
-rw-r--r--lib/PublicInbox/LeiInput.pm4
-rw-r--r--lib/PublicInbox/LeiMailDiff.pm3
-rw-r--r--lib/PublicInbox/LeiSavedSearch.pm6
-rw-r--r--lib/PublicInbox/LeiViewText.pm3
-rw-r--r--lib/PublicInbox/MailDiff.pm3
-rw-r--r--lib/PublicInbox/MultiGit.pm6
-rw-r--r--lib/PublicInbox/Search.pm18
-rw-r--r--lib/PublicInbox/SearchThread.pm9
-rw-r--r--lib/PublicInbox/SearchView.pm17
-rw-r--r--lib/PublicInbox/SolverGit.pm13
-rw-r--r--lib/PublicInbox/Syscall.pm66
-rw-r--r--lib/PublicInbox/TestCommon.pm12
-rw-r--r--lib/PublicInbox/View.pm109
-rw-r--r--lib/PublicInbox/ViewDiff.pm10
-rw-r--r--lib/PublicInbox/ViewVCS.pm20
-rw-r--r--lib/PublicInbox/WwwCoderepo.pm9
-rw-r--r--lib/PublicInbox/WwwText.pm2
-rw-r--r--lib/PublicInbox/XapHelper.pm48
-rw-r--r--lib/PublicInbox/XapHelperCxx.pm1
-rw-r--r--lib/PublicInbox/khashl.h502
-rw-r--r--lib/PublicInbox/xap_helper.h273
-rw-r--r--lib/PublicInbox/xh_cidx.h79
39 files changed, 1247 insertions, 515 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index a1b1fc07..bb5d3653 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -11,6 +11,7 @@ use PublicInbox::Config;
 use PublicInbox::Inbox;
 use PublicInbox::Spawn qw(run_qx);
 use PublicInbox::Eml;
+use PublicInbox::Git qw(git_exe);
 *rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed;
 
 sub setup_signals {
@@ -77,7 +78,7 @@ sub resolve_git_dir {
         my $env;
         defined($pwd) && substr($cd // '/', 0, 1) ne '/' and
                 $env->{PWD} = "$pwd/$cd";
-        my $cmd = [ qw(git rev-parse --git-dir) ];
+        my $cmd = [ git_exe, qw(rev-parse --git-dir) ];
         my $dir = run_qx($cmd, $env, { -C => $cd });
         die "error in @$cmd (cwd:${\($cd // '.')}): $?\n" if $?;
         chomp $dir;
@@ -317,7 +318,7 @@ sub progress_prepare ($;$) {
                 $opt->{1} = $null; # suitable for spawn() redirect
         } else {
                 $opt->{verbose} ||= 1;
-                $dst //= *STDERR{GLOB};
+                $dst //= \*STDERR;
                 $opt->{-progress} = sub { print $dst '# ', @_ };
         }
 }
diff --git a/lib/PublicInbox/Compat.pm b/lib/PublicInbox/Compat.pm
index 78cba90e..8ed2d7a1 100644
--- a/lib/PublicInbox/Compat.pm
+++ b/lib/PublicInbox/Compat.pm
@@ -8,7 +8,7 @@ use v5.12;
 use parent qw(Exporter);
 require List::Util;
 
-our @EXPORT_OK = qw(uniqstr);
+our @EXPORT_OK = qw(uniqstr sum0);
 
 # uniqstr is in List::Util 1.45+, which means Perl 5.26+;
 # so maybe 2030 for us since we need to support enterprise distros.
@@ -21,4 +21,6 @@ no warnings 'once';
         grep { !$seen{$_}++ } @_;
 };
 
+*sum0 = List::Util->can('sum0') // sub (@) { List::Util::sum(@_) // 0 };
+
 1;
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 49659a2e..998fc25e 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -517,7 +517,9 @@ sub _fill_ibx {
                         delete $ibx->{newsgroup};
                         warn "newsgroup name invalid: `$ngname'\n";
                 } else {
-                        my $lc = $ibx->{newsgroup} = lc $ngname;
+                        %dedupe = (lc($ngname) => undef);
+                        my ($lc) = keys %dedupe;
+                        $ibx->{newsgroup} = $lc;
                         warn <<EOM if $lc ne $ngname;
 W: newsgroup=`$ngname' lowercased to `$lc'
 EOM
@@ -630,7 +632,7 @@ sub urlmatch {
                 } elsif (($? >> 8) != 1) {
                         $urlmatch_broken = 1;
                 } elsif ($try_git) { # n.b. this takes cwd into account
-                        $val = run_qx([qw(git config), @bool,
+                        $val = run_qx([$cmd->[0], 'config', @bool,
                                         qw(-z --get-urlmatch), $key, $url]);
                         undef $val if $?;
                 }
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index a6fec954..f807c626 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -18,14 +18,14 @@
 # sock: underlying socket
 # rbuf: scalarref, usually undef
 # wbuf: arrayref of coderefs or tmpio (autovivified))
-#        (tmpio = [ GLOB, offset, [ length ] ])
+#        (tmpio = [ GLOB, offset, [ length ] ])
 package PublicInbox::DS;
 use strict;
 use v5.10.1;
 use parent qw(Exporter);
 use bytes qw(length substr); # FIXME(?): needed for PublicInbox::NNTP
 use POSIX qw(WNOHANG sigprocmask SIG_SETMASK SIG_UNBLOCK);
-use Fcntl qw(SEEK_SET :DEFAULT O_APPEND);
+use Fcntl qw(SEEK_SET :DEFAULT);
 use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
 use Scalar::Util qw(blessed);
 use PublicInbox::Syscall qw(%SIGNUM
@@ -35,23 +35,24 @@ use PublicInbox::Select;
 use PublicInbox::OnDestroy;
 use Errno qw(EAGAIN EINVAL ECHILD);
 use Carp qw(carp croak);
+use List::Util qw(sum);
 our @EXPORT_OK = qw(now msg_more awaitpid add_timer add_uniq_timer);
+my $sendmsg_more = PublicInbox::Syscall->can('sendmsg_more');
+my $writev = PublicInbox::Syscall->can('writev');
 
 my $nextq; # queue for next_tick
 my $reap_armed;
 my @active; # FDs (or objs) returned by epoll/kqueue
 our (%AWAIT_PIDS, # pid => [ $callback, @args ]
         $cur_runq, # only set inside next_tick
-     @FD_MAP, # fd (num) -> PublicInbox::DS object
-     $Poller, # global Select, Epoll, DSPoll, or DSKQXS ref
-
-     @post_loop_do,              # subref + args to call at the end of each loop
-
-     $loop_timeout,               # timeout of event loop in milliseconds
-     @Timers,                    # timers
-     %UniqTimer,
-     $in_loop,
-     );
+        @FD_MAP, # fd (num) -> PublicInbox::DS object
+        $Poller, # global Select, Epoll, DSPoll, or DSKQXS ref
+        @post_loop_do,        # subref + args to call at the end of each loop
+        $loop_timeout,        # timeout of event loop in milliseconds
+        @Timers,
+        %UniqTimer,
+        $in_loop,
+);
 
 Reset();
 
@@ -318,23 +319,22 @@ This is normally (always?) called from your subclass via:
 
 =cut
 sub new {
-    my ($self, $sock, $ev) = @_;
-    $self->{sock} = $sock;
-    my $fd = fileno($sock);
-
-    $Poller //= _InitPoller();
+        my ($self, $sock, $ev) = @_;
+        $self->{sock} = $sock;
+        my $fd = fileno($sock);
+        $Poller //= _InitPoller();
 retry:
-    if ($Poller->ep_add($sock, $ev)) {
-        if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) {
-            $ev &= ~EPOLLEXCLUSIVE;
-            goto retry;
-        }
-        die "EPOLL_CTL_ADD $self/$sock/$fd: $!";
-    }
-    defined($FD_MAP[$fd]) and
+        if ($Poller->ep_add($sock, $ev)) {
+                if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) {
+                        $ev &= ~EPOLLEXCLUSIVE;
+                        goto retry;
+                }
+                die "EPOLL_CTL_ADD $self/$sock/$fd: $!";
+        }
+        defined($FD_MAP[$fd]) and
                 croak("BUG: FD:$fd in use by $FD_MAP[$fd] (for $self/$sock)");
 
-    $FD_MAP[$fd] = $self;
+        $FD_MAP[$fd] = $self;
 }
 
 # for IMAP, NNTP, and POP3 which greet clients upon connect
@@ -374,75 +374,80 @@ sub close {
 
 # portable, non-thread-safe sendfile emulation (no pread, yet)
 sub send_tmpio ($$) {
-    my ($sock, $tmpio) = @_;
-
-    sysseek($tmpio->[0], $tmpio->[1], SEEK_SET) or return;
-    my $n = $tmpio->[2] // 65536;
-    $n = 65536 if $n > 65536;
-    defined(my $to_write = sysread($tmpio->[0], my $buf, $n)) or return;
-    my $written = 0;
-    while ($to_write > 0) {
-        if (defined(my $w = syswrite($sock, $buf, $to_write, $written))) {
-            $written += $w;
-            $to_write -= $w;
-        } else {
-            return if $written == 0;
-            last;
-        }
-    }
-    $tmpio->[1] += $written; # offset
-    $tmpio->[2] -= $written if defined($tmpio->[2]); # length
-    $written;
+        my ($sock, $tmpio) = @_;
+
+        sysseek($tmpio->[0], $tmpio->[1], SEEK_SET) or return;
+        my $n = $tmpio->[2] // 65536;
+        $n = 65536 if $n > 65536;
+        my $to_write = sysread($tmpio->[0], my $buf, $n) // return;
+        my $total = 0;
+        while ($to_write > 0) {
+                if (defined(my $w = syswrite($sock, $buf, $to_write, $total))) {
+                        $total += $w;
+                        $to_write -= $w;
+                } else {
+                        $total ? last : return;
+                }
+        }
+        $tmpio->[1] += $total; # offset
+        $tmpio->[2] -= $total if defined($tmpio->[2]); # length
+        $total;
 }
 
 sub epbit ($$) { # (sock, default)
         $_[0]->can('stop_SSL') ? PublicInbox::TLS::epollbit() : $_[1];
 }
 
+sub epwait ($$) {
+        my ($io, $ev) = @_;
+        $Poller->ep_mod($io, $ev) and croak("EPOLL_CTL_MOD($io): $!");
+}
+
 # returns 1 if done, 0 if incomplete
-sub flush_write ($) {
-    my ($self) = @_;
-    my $sock = $self->{sock} or return;
-    my $wbuf = $self->{wbuf} or return 1;
+sub flush_write {
+        my ($self) = @_;
+        my $sock = $self->{sock} or return;
+        my $wbuf = $self->{wbuf} or return 1;
 
 next_buf:
-    while (my $bref = $wbuf->[0]) {
-        if (ref($bref) ne 'CODE') {
-            while ($sock) {
-                my $w = send_tmpio($sock, $bref); # bref is tmpio
-                if (defined $w) {
-                    if ($w == 0) {
-                        shift @$wbuf;
-                        goto next_buf;
-                    }
-                } elsif ($! == EAGAIN && (my $ev = epbit($sock, EPOLLOUT))) {
-                    epwait($sock, $ev | EPOLLONESHOT);
-                    return 0;
-                } else {
-                    return $self->close;
-                }
-            }
-        } else { #(ref($bref) eq 'CODE') {
-            shift @$wbuf;
-            my $before = scalar(@$wbuf);
-            $bref->($self);
-
-            # bref may be enqueueing more CODE to call (see accept_tls_step)
-            return 0 if (scalar(@$wbuf) > $before);
-        }
-    } # while @$wbuf
-
-    delete $self->{wbuf};
-    1; # all done
+        while (my $bref = $wbuf->[0]) {
+                if (ref($bref) ne 'CODE') { # bref is tmpio
+                        while ($sock) {
+                                my $w = send_tmpio $sock, $bref;
+                                if (defined $w) {
+                                        if ($w == 0) {
+                                                shift @$wbuf;
+                                                goto next_buf;
+                                        }
+                                } elsif ($! == EAGAIN && (my $ev = epbit
+                                                        $sock, EPOLLOUT)) {
+                                        epwait $sock, $ev | EPOLLONESHOT;
+                                        return 0;
+                                } else {
+                                        return $self->close;
+                                }
+                        }
+                } else { #(ref($bref) eq 'CODE') {
+                        shift @$wbuf;
+                        my $before = scalar(@$wbuf);
+                        $bref->($self);
+                        # bref may be enqueueing more CODE to call
+                        # (see accept_tls_step)
+                        return 0 if (scalar(@$wbuf) > $before);
+                }
+        } # while @$wbuf
+
+        delete $self->{wbuf};
+        1; # all done
 }
 
 sub rbuf_idle ($$) {
-    my ($self, $rbuf) = @_;
-    if ($$rbuf eq '') { # who knows how long till we can read again
-        delete $self->{rbuf};
-    } else {
-        $self->{rbuf} = $rbuf;
-    }
+        my ($self, $rbuf) = @_;
+        if ($$rbuf eq '') { # who knows how long till we can read again
+                delete $self->{rbuf};
+        } else {
+                $self->{rbuf} = $rbuf;
+        }
 }
 
 # returns true if bytes are read, false otherwise
@@ -451,8 +456,8 @@ sub do_read ($$$;$) {
         my ($ev, $r, $s);
         $r = sysread($s = $self->{sock}, $$rbuf, $len, $off // 0) and return $r;
 
-        if (!defined($r) && $! == EAGAIN && ($ev = epbit($s, EPOLLIN))) {
-                epwait($s, $ev | EPOLLONESHOT);
+        if (!defined($r) && $! == EAGAIN && ($ev = epbit $s, EPOLLIN)) {
+                epwait $s, $ev | EPOLLONESHOT;
                 rbuf_idle($self, $rbuf);
         } else {
                 $self->close;
@@ -462,22 +467,23 @@ sub do_read ($$$;$) {
 
 # drop the socket if we hit unrecoverable errors on our system which
 # require BOFH attention: ENOSPC, EFBIG, EIO, EMFILE, ENFILE...
-sub drop {
+sub drop ($@) {
         my $self = shift;
         carp(@_);
         $self->close;
         undef;
 }
 
-sub tmpio ($$$) {
-        my ($self, $bref, $off) = @_;
-        my $fh = tmpfile('wbuf', $self->{sock}, O_APPEND) or
-                return drop($self, "tmpfile $!");
+sub tmpio ($$$;@) {
+        my ($self, $bref, $off, @rest) = @_;
+        my $fh = tmpfile 'wbuf', $self->{sock}, 1 or
+                return drop $self, "tmpfile $!";
         $fh->autoflush(1);
         my $len = length($$bref) - $off;
         my $n = syswrite($fh, $$bref, $len, $off) //
-                return drop($self, "write ($len): $!");
-        $n == $len or return drop($self, "wrote $n < $len bytes");
+                return drop $self, "write ($len): $!";
+        $n == $len or return drop $self, "wrote $n < $len bytes";
+        @rest and (print $fh @rest or return drop $self, "print rest: $!");
         [ $fh, 0 ] # [1] = offset, [2] = length, not set by us
 }
 
@@ -490,112 +496,135 @@ it returns 1, caller should stop waiting for 'writable' events)
 
 =cut
 sub write {
-    my ($self, $data) = @_;
-
-    # nobody should be writing to closed sockets, but caller code can
-    # do two writes within an event, have the first fail and
-    # disconnect the other side (whose destructor then closes the
-    # calling object, but it's still in a method), and then the
-    # now-dead object does its second write.  that is this case.  we
-    # just lie and say it worked.  it'll be dead soon and won't be
-    # hurt by this lie.
-    my $sock = $self->{sock} or return 1;
-    my $ref = ref $data;
-    my $bref = $ref ? $data : \$data;
-    my $wbuf = $self->{wbuf};
-    if ($wbuf && scalar(@$wbuf)) { # already buffering, can't write more...
-        if ($ref eq 'CODE') {
-            push @$wbuf, $bref;
-        } else {
-            my $tmpio = $wbuf->[-1];
-            if (ref($tmpio) eq 'ARRAY' && !defined($tmpio->[2])) {
-                # append to tmp file buffer
-                $tmpio->[0]->print($$bref) or return drop($self, "print: $!");
-            } else {
-                my $tmpio = tmpio($self, $bref, 0) or return 0;
-                push @$wbuf, $tmpio;
-            }
-        }
-        return 0;
-    } elsif ($ref eq 'CODE') {
-        $bref->($self);
-        return 1;
-    } else {
-        my $to_write = length($$bref);
-        my $written = syswrite($sock, $$bref, $to_write);
-
-        if (defined $written) {
-            return 1 if $written == $to_write;
-            requeue($self); # runs: event_step -> flush_write
-        } elsif ($! == EAGAIN) {
-            my $ev = epbit($sock, EPOLLOUT) or return $self->close;
-            epwait($sock, $ev | EPOLLONESHOT);
-            $written = 0;
-        } else {
-            return $self->close;
-        }
-
-        # deal with EAGAIN or partial write:
-        my $tmpio = tmpio($self, $bref, $written) or return 0;
-
-        # wbuf may be an empty array if we're being called inside
-        # ->flush_write via CODE bref:
-        push @{$self->{wbuf}}, $tmpio; # autovivifies
-        return 0;
-    }
-}
-
-use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0;
-
-sub msg_more ($$) {
-    my $self = $_[0];
-    my $sock = $self->{sock} or return 1;
-    my $wbuf = $self->{wbuf};
-
-    if (MSG_MORE && (!defined($wbuf) || !scalar(@$wbuf)) &&
-                !$sock->can('stop_SSL')) {
-        my $n = send($sock, $_[1], MSG_MORE);
-        if (defined $n) {
-            my $nlen = length($_[1]) - $n;
-            return 1 if $nlen == 0; # all done!
-            # queue up the unwritten substring:
-            my $tmpio = tmpio($self, \($_[1]), $n) or return 0;
-            push @{$self->{wbuf}}, $tmpio; # autovivifies
-            epwait($sock, EPOLLOUT|EPOLLONESHOT);
-            return 0;
-        }
-    }
-
-    # don't redispatch into NNTPdeflate::write
-    PublicInbox::DS::write($self, \($_[1]));
+        my ($self, $data) = @_;
+
+        # nobody should be writing to closed sockets, but caller code can
+        # do two writes within an event, have the first fail and
+        # disconnect the other side (whose destructor then closes the
+        # calling object, but it's still in a method), and then the
+        # now-dead object does its second write.  that is this case.  we
+        # just lie and say it worked.  it'll be dead soon and won't be
+        # hurt by this lie.
+        my $sock = $self->{sock} or return 1;
+        my $ref = ref $data;
+        my $bref = $ref ? $data : \$data;
+        my $wbuf = $self->{wbuf};
+        if ($wbuf && scalar(@$wbuf)) { # already buffering, can't write more...
+                if ($ref eq 'CODE') {
+                        push @$wbuf, $bref;
+                } else {
+                        my $tmpio = $wbuf->[-1];
+                        if (ref($tmpio) eq 'ARRAY' && !defined($tmpio->[2])) {
+                                # append to tmp file buffer
+                                $tmpio->[0]->print($$bref) or
+                                        return drop($self, "print: $!");
+                        } else {
+                                $tmpio = tmpio $self, $bref, 0 or return 0;
+                                push @$wbuf, $tmpio;
+                        }
+                }
+                0;
+        } elsif ($ref eq 'CODE') {
+                $bref->($self);
+                1;
+        } else {
+                my $to_write = length $$bref;
+                my $w = syswrite $sock, $$bref, $to_write;
+
+                if (defined $w) {
+                        return 1 if $w == $to_write;
+                        requeue $self; # runs: event_step -> flush_write
+                } elsif ($! == EAGAIN) {
+                        my $ev = epbit $sock, EPOLLOUT or return $self->close;
+                        epwait $sock, $ev | EPOLLONESHOT;
+                        $w = 0;
+                } else {
+                        return $self->close;
+                }
+
+                # deal with EAGAIN or partial write:
+                my $tmpio = tmpio $self, $bref, $w or return 0;
+
+                # wbuf may be an empty array if we're being called inside
+                # ->flush_write via CODE bref:
+                push @{$self->{wbuf}}, $tmpio; # autovivifies
+                0;
+        }
 }
 
-sub epwait ($$) {
-        my ($io, $ev) = @_;
-        $Poller->ep_mod($io, $ev) and croak("EPOLL_CTL_MOD($io): $!");
+sub _iov_write ($$@) {
+        my ($self, $cb) = (shift, shift);
+        my ($tip, $tmpio, $s, $exp);
+        $s = $cb->($self->{sock}, @_);
+        if (defined $s) {
+                $exp = sum(map length, @_);
+                return 1 if $s == $exp;
+                while (@_) {
+                        $tip = shift;
+                        if ($s >= length($tip)) { # fully written
+                                $s -= length($tip);
+                        } else { # first partial write
+                                $tmpio = tmpio $self, \$tip, $s, @_ or return 0;
+                                last;
+                        }
+                }
+                $tmpio // return drop $self, "BUG: tmpio on $s != $exp";
+        } elsif ($! == EAGAIN) {
+                $tip = shift;
+                $tmpio = tmpio $self, \$tip, 0, @_ or return 0;
+        } else { # client disconnected
+                return $self->close;
+        }
+        push @{$self->{wbuf}}, $tmpio; # autovivifies
+        epwait $self->{sock}, EPOLLOUT|EPOLLONESHOT;
+        0;
+}
+
+sub msg_more ($@) {
+        my $self = shift;
+        my $sock = $self->{sock} or return 1;
+        my $wbuf = $self->{wbuf};
+        if ($sendmsg_more && (!defined($wbuf) || !scalar(@$wbuf)) &&
+                        !$sock->can('stop_SSL')) {
+                _iov_write $self, $sendmsg_more, @_;
+        } else { # don't redispatch into NNTPdeflate::write
+                PublicInbox::DS::write($self, join('', @_));
+        }
+}
+
+sub writev ($@) {
+        my $self = shift;
+        my $sock = $self->{sock} or return 1;
+        my $wbuf = $self->{wbuf};
+        if ($writev && (!defined($wbuf) || !scalar(@$wbuf)) &&
+                        !$sock->can('stop_SSL')) {
+                _iov_write $self, $writev, @_;
+        } else { # don't redispatch into NNTPdeflate::write
+                PublicInbox::DS::write($self, join('', @_));
+        }
 }
 
 # return true if complete, false if incomplete (or failure)
 sub accept_tls_step ($) {
-    my ($self) = @_;
-    my $sock = $self->{sock} or return;
-    return 1 if $sock->accept_SSL;
-    return $self->close if $! != EAGAIN;
-    my $ev = PublicInbox::TLS::epollbit() or return $self->close;
-    epwait($sock, $ev | EPOLLONESHOT);
-    unshift(@{$self->{wbuf}}, \&accept_tls_step); # autovivifies
-    0;
+        my ($self) = @_;
+        my $sock = $self->{sock} or return;
+        return 1 if $sock->accept_SSL;
+        return $self->close if $! != EAGAIN;
+        my $ev = PublicInbox::TLS::epollbit() or return $self->close;
+        epwait $sock, $ev | EPOLLONESHOT;
+        unshift @{$self->{wbuf}}, \&accept_tls_step; # autovivifies
+        0;
 }
 
 # return value irrelevant
 sub shutdn_tls_step ($) {
-    my ($self) = @_;
-    my $sock = $self->{sock} or return;
-    return $self->close if $sock->stop_SSL(SSL_fast_shutdown => 1);
-    return $self->close if $! != EAGAIN;
-    my $ev = PublicInbox::TLS::epollbit() or return $self->close;
-    epwait($sock, $ev | EPOLLONESHOT);
-    unshift(@{$self->{wbuf}}, \&shutdn_tls_step); # autovivifies
+        my ($self) = @_;
+        my $sock = $self->{sock} or return;
+        return $self->close if $sock->stop_SSL(SSL_fast_shutdown => 1);
+        return $self->close if $! != EAGAIN;
+        my $ev = PublicInbox::TLS::epollbit() or return $self->close;
+        epwait $sock, $ev | EPOLLONESHOT;
+        unshift @{$self->{wbuf}}, \&shutdn_tls_step; # autovivifies
 }
 
 # don't bother with shutdown($sock, 2), we don't fork+exec w/o CLOEXEC
@@ -603,7 +632,7 @@ sub shutdn_tls_step ($) {
 sub shutdn ($) {
         my ($self) = @_;
         my $sock = $self->{sock} or return;
-        $sock->can('stop_SSL') ? shutdn_tls_step($self) : $self->close;
+        $sock->can('stop_SSL') ? shutdn_tls_step $self : $self->close;
 }
 
 sub dflush {} # overridden by DSdeflate
diff --git a/lib/PublicInbox/DSdeflate.pm b/lib/PublicInbox/DSdeflate.pm
index 539adf0f..a9f9693b 100644
--- a/lib/PublicInbox/DSdeflate.pm
+++ b/lib/PublicInbox/DSdeflate.pm
@@ -91,12 +91,15 @@ sub do_read ($$$$) {
 }
 
 # override PublicInbox::DS::msg_more
-sub msg_more ($$) {
-        my $self = $_[0];
+sub msg_more ($@) {
+        my $self = shift;
 
         # $_[1] may be a reference or not for ->deflate
-        my $err = $zout->deflate($_[1], $zbuf);
-        $err == Z_OK or die "->deflate failed $err";
+        my $err;
+        for (@_) {
+                $err = $zout->deflate($_, $zbuf);
+                $err == Z_OK or die "->deflate failed $err";
+        }
         1;
 }
 
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 883dbea3..934197c0 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -543,13 +543,7 @@ sub _ibx_for ($$$) {
 sub _fd_constrained ($) {
         my ($self) = @_;
         $self->{-fd_constrained} //= do {
-                my $soft;
-                if (eval { require BSD::Resource; 1 }) {
-                        my $NOFILE = BSD::Resource::RLIMIT_NOFILE();
-                        ($soft, undef) = BSD::Resource::getrlimit($NOFILE);
-                } else {
-                        chomp($soft = `sh -c 'ulimit -n'`);
-                }
+                my $soft = PublicInbox::Search::ulimit_n;
                 if (defined($soft)) {
                         # $want is an estimate
                         my $want = scalar(@{$self->{ibx_active}}) + 64;
diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm
index 78392990..acc2091c 100644
--- a/lib/PublicInbox/Gcf2.pm
+++ b/lib/PublicInbox/Gcf2.pm
@@ -11,7 +11,7 @@ use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
 use IO::Handle; # autoflush
 use PublicInbox::Git qw($ck_unlinked_packs);
 use PublicInbox::Lock;
-use autodie qw(close open seek truncate);
+use autodie qw(open seek truncate);
 
 BEGIN {
         my (%CFG, $c_src);
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index a9a821ad..32c11a59 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -61,7 +61,10 @@ sub git_exe () {
         return $GIT_EXE if $now < $next_check;
         $next_check = $now + 10;
         $GIT_EXE = which('git') // die "git not found in $ENV{PATH}";
-        my @st = stat(_) or die "stat($GIT_EXE): $!"; # can't do HiRes w/ _
+}
+
+sub git_version () {
+        my @st = stat(git_exe) or die "stat($GIT_EXE): $!";
         my $st = pack('dd', $st[0], $st[1]);
         if ($st ne $EXE_ST) {
                 my $v = run_qx([ $GIT_EXE, '--version' ]);
@@ -71,11 +74,6 @@ sub git_exe () {
                 $GIT_VER = eval("v$1") // die "BUG: bad vstring: $1 ($v)";
                 $EXE_ST = $st;
         }
-        $GIT_EXE;
-}
-
-sub git_version () {
-        git_exe;
         $GIT_VER;
 }
 
@@ -108,9 +106,10 @@ sub new {
 sub git_path ($$) {
         my ($self, $path) = @_;
         $self->{-git_path}->{$path} //= do {
-                my $d = "$self->{git_dir}/$path";
-                if (-e $d) {
-                        $d;
+                my $d = $self->{git_dir};
+                my $f = "$d/$path";
+                if (-d "$d/objects") {
+                        $f;
                 } else {
                         local $/ = "\n";
                         my $rdr = { 2 => \my $err };
@@ -119,7 +118,7 @@ sub git_path ($$) {
                         chomp $s;
 
                         # git prior to 2.5.0 did not understand --git-path
-                        $s eq "--git-path\n$path" ? $d : $s;
+                        $s eq "--git-path\n$path" ? $f : $s;
                 }
         };
 }
@@ -208,7 +207,7 @@ sub cat_async_retry ($$) {
                 $oid = \$oid if !@$new_inflight; # to indicate oid retried
                 push @$new_inflight, $oid, $cb, $arg;
         }
-        $sock->close if $sock; # only safe once old_inflight is empty
+        undef $sock; # gcf_drain may run from PublicInbox::IO::DESTROY
         cat_async_step($self, $new_inflight); # take one step
 }
 
@@ -639,7 +638,8 @@ sub event_step {
         my ($self) = @_;
         my $inflight = gcf_inflight($self);
         if ($inflight && @$inflight) {
-                $self->cat_async_step($inflight);
+                eval { $self->cat_async_step($inflight) };
+                warn "E: $self->{git_dir}: $@" if $@;
                 return $self->close unless $self->{sock};
                 # don't loop here to keep things fair, but we must requeue
                 # if there's already-read data in pi_io_rbuf
@@ -664,10 +664,9 @@ sub watch_async ($) {
 
 sub close {
         my ($self) = @_;
-        my $sock = $self->{sock};
         delete @$self{qw(-bc err_c inflight)};
         delete($self->{epwatch}) ? $self->SUPER::close : delete($self->{sock});
-        $sock->close if $sock; # calls gcf_drain via awaitpid
+        # gcf_drain may run from PublicInbox::IO::DESTROY
 }
 
 package PublicInbox::GitCheck; # only for git <2.36
diff --git a/lib/PublicInbox/GitCredential.pm b/lib/PublicInbox/GitCredential.pm
index bb225ff3..cf5a2213 100644
--- a/lib/PublicInbox/GitCredential.pm
+++ b/lib/PublicInbox/GitCredential.pm
@@ -4,13 +4,14 @@
 # git-credential wrapper with built-in .netrc fallback
 package PublicInbox::GitCredential;
 use v5.12;
+use PublicInbox::Git qw(git_exe);
 use PublicInbox::Spawn qw(popen_rd);
 use autodie qw(close pipe);
 
 sub run ($$;$) {
         my ($self, $op, $lei) = @_;
         my ($in_r, $in_w, $out_r);
-        my $cmd = [ qw(git credential), $op ];
+        my $cmd = [ git_exe, 'credential', $op ];
         pipe($in_r, $in_w);
         if ($lei) { # we'll die if disconnected:
                 pipe($out_r, my $out_w);
diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index 396aa783..ac610d4b 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -106,7 +106,9 @@ sub serve_smart {
         $env{PATH_TRANSLATED} = "$git->{git_dir}/$path";
         my $rdr = input_prepare($env) or return r(500);
         $rdr->{quiet} = 1;
-        my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr);
+        my $cmd = $git->cmd('http-backend');
+        splice @$cmd, 1, 0, '-c', 'safe.directory=*';
+        my $qsp = PublicInbox::Qspawn->new($cmd, \%env, $rdr);
         $qsp->psgi_yield($env, $limiter, \&ghb_parse_hdr, $env, $git, $path);
 }
 
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 8b630f25..a2e82a2d 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -11,6 +11,8 @@
 # async_eml callbacks only run when a blob arrives from git.
 #
 # We continue to support getline+close for generic PSGI servers.
+# Note: we favor gzip in Perl (as opposed to nginx || varnish) to
+# reduce IPC memory traffic
 package PublicInbox::GzipFilter;
 use strict;
 use parent qw(Exporter);
@@ -21,7 +23,9 @@ use PublicInbox::GitAsyncCat;
 use Carp qw(carp);
 
 our @EXPORT_OK = qw(gzf_maybe);
-my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
+# Compress::Raw::Zlib uses MAX_MEM_LEVEL (9) while zlib DEF_MEM_LEVEL is 8;
+# choose the zlib default because C:R:Z is excessive.
+my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1, -MemLevel => 8);
 my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip);
 
 sub new { bless {}, shift } # qspawn filter
diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index 7162732e..73632aee 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -21,6 +21,7 @@
 package PublicInbox::HTTP;
 use strict;
 use parent qw(PublicInbox::DS);
+use bytes qw(length);
 use Fcntl qw(:seek);
 use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl
 use Plack::Util;
@@ -36,6 +37,7 @@ use constant {
         CHUNK_MAX_HDR => 256,
 };
 use Errno qw(EAGAIN);
+use PublicInbox::Compat qw(sum0);
 
 # Use the same configuration parameter as git since this is primarily
 # a slow-client sponge for git-http-backend
@@ -47,7 +49,7 @@ open(my $null_io, '<', '/dev/null') or die "open /dev/null: $!";
 {
         my @n = stat($null_io) or die "stat(/dev/null): $!";
         my @i = stat(STDIN) or die "stat(STDIN): $!";
-        $null_io = *STDIN{IO} if "@n[0, 1]" eq "@i[0, 1]";
+        $null_io = \*STDIN if "@n[0, 1]" eq "@i[0, 1]";
 }
 
 my $http_date;
@@ -165,7 +167,7 @@ sub app_dispatch {
         }
 }
 
-sub response_header_write {
+sub response_header_write ($$$) {
         my ($self, $env, $res) = @_;
         my $proto = $env->{SERVER_PROTOCOL} or return; # HTTP/0.9 :P
         my $status = $res->[0];
@@ -188,7 +190,13 @@ sub response_header_write {
         my $conn = $env->{HTTP_CONNECTION} || '';
         my $term = defined($len) || $chunked;
         my $prot_persist = ($proto eq 'HTTP/1.1') && ($conn !~ /\bclose\b/i);
-        my $alive;
+        my ($alive, $res_body);
+        if (!$term && ref($res->[2]) eq 'ARRAY') {
+                ($res_body, $res->[2]) = ($res->[2], []);
+                $len = sum0(map length, @$res_body);
+                $h .= "Content-Length: $len\r\n";
+                $term = 1;
+        }
         if (!$term && $prot_persist) { # auto-chunk
                 $chunked = $alive = 2;
                 $alive = 3 if $env->{REQUEST_METHOD} eq 'HEAD';
@@ -203,7 +211,9 @@ sub response_header_write {
         }
         $h .= 'Date: ' . http_date() . "\r\n\r\n";
 
-        if (($len || $chunked) && $env->{REQUEST_METHOD} ne 'HEAD') {
+        if ($res_body) {
+                $self->writev($h, @$res_body);
+        } elsif (($len || $chunked) && $env->{REQUEST_METHOD} ne 'HEAD') {
                 msg_more($self, $h);
         } else {
                 $self->write(\$h);
@@ -213,14 +223,9 @@ sub response_header_write {
 
 # middlewares such as Deflater may write empty strings
 sub chunked_write ($$) {
-        my $self = $_[0];
-        return if $_[1] eq '';
-        msg_more($self, sprintf("%x\r\n", length($_[1])));
-        msg_more($self, $_[1]);
-
-        # use $self->write(\"\n\n") if you care about real-time
-        # streaming responses, public-inbox WWW does not.
-        msg_more($self, "\r\n");
+        my ($self, $buf) = @_;
+        $buf eq '' or
+                msg_more $self, sprintf("%x\r\n", length($buf)), $buf, "\r\n";
 }
 
 sub identity_write ($$) {
@@ -459,7 +464,7 @@ use v5.12;
 sub write {
         # ([$http], $buf) = @_;
         PublicInbox::HTTP::chunked_write($_[0]->[0], $_[1]);
-        $_[0]->[0]->{sock} ? length($_[1]) : undef;
+        $_[0]->[0]->{sock} ? bytes::length($_[1]) : undef;
 }
 
 sub close {
@@ -474,7 +479,7 @@ our @ISA = qw(PublicInbox::HTTP::Chunked);
 sub write {
         # ([$http], $buf) = @_;
         PublicInbox::HTTP::identity_write($_[0]->[0], $_[1]);
-        $_[0]->[0]->{sock} ? length($_[1]) : undef;
+        $_[0]->[0]->{sock} ? bytes::length($_[1]) : undef;
 }
 
 1;
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index b12533cb..0eb21b6a 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -605,8 +605,7 @@ sub fetch_blob_cb { # called by git->cat_async via ibx_async_cat
 
 sub emit_rfc822 {
         my ($self, $k, undef, $bref) = @_;
-        $self->msg_more(" $k {" . length($$bref)."}\r\n");
-        $self->msg_more($$bref);
+        $self->msg_more(" $k {" . length($$bref)."}\r\n", $$bref);
 }
 
 # Mail::IMAPClient::message_string cares about this by default,
@@ -626,20 +625,18 @@ sub emit_flags { $_[0]->msg_more(' FLAGS ()') }
 
 sub emit_envelope {
         my ($self, undef, undef, undef, $eml) = @_;
-        $self->msg_more(' ENVELOPE '.eml_envelope($eml));
+        $self->msg_more(' ENVELOPE ', eml_envelope($eml));
 }
 
 sub emit_rfc822_header {
         my ($self, $k, undef, undef, $eml) = @_;
-        $self->msg_more(" $k {".length(${$eml->{hdr}})."}\r\n");
-        $self->msg_more(${$eml->{hdr}});
+        $self->msg_more(" $k {".length(${$eml->{hdr}})."}\r\n", ${$eml->{hdr}});
 }
 
 # n.b. this is sorted to be after any emit_eml_new ops
 sub emit_rfc822_text {
         my ($self, $k, undef, $bref) = @_;
-        $self->msg_more(" $k {".length($$bref)."}\r\n");
-        $self->msg_more($$bref);
+        $self->msg_more(" $k {".length($$bref)."}\r\n", $$bref);
 }
 
 sub emit_bodystructure {
@@ -970,8 +967,7 @@ sub partial_emit ($$$) {
                 } else {
                         $len = length($str);
                 }
-                $self->msg_more(" $k {$len}\r\n");
-                $self->msg_more($str);
+                $self->msg_more(" $k {$len}\r\n", $str);
         }
 }
 
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index fefc282a..2e193d46 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -8,6 +8,7 @@
 package PublicInbox::Import;
 use v5.12;
 use parent qw(PublicInbox::Lock);
+use PublicInbox::Git qw(git_exe);
 use PublicInbox::Spawn qw(run_die run_qx spawn);
 use PublicInbox::MID qw(mids mid2path);
 use PublicInbox::Address;
@@ -24,7 +25,7 @@ use PublicInbox::IO qw(read_all);
 
 sub default_branch () {
         state $default_branch = do {
-                my $h = run_qx([qw(git config --global init.defaultBranch)],
+                my $h = run_qx([git_exe,qw(config --global init.defaultBranch)],
                                  { GIT_CONFIG => undef });
                 chomp $h;
                 $h eq '' ? 'refs/heads/master' : "refs/heads/$h";
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index e9a0de6c..c5146428 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -22,6 +22,7 @@ use PublicInbox::Syscall qw(EPOLLIN);
 use PublicInbox::Spawn qw(run_wait popen_rd run_qx);
 use PublicInbox::Lock;
 use PublicInbox::Eml;
+use PublicInbox::Git qw(git_exe);
 use PublicInbox::Import;
 use PublicInbox::ContentHash qw(git_sha);
 use PublicInbox::OnDestroy;
@@ -506,12 +507,12 @@ sub x_it ($$) {
 
 sub err ($;@) {
         my $self = shift;
-        my $err = $self->{2} // ($self->{pgr} // [])->[2] // *STDERR{GLOB};
+        my $err = $self->{2} // ($self->{pgr} // [])->[2] // \*STDERR;
         my @eor = (substr($_[-1]//'', -1, 1) eq "\n" ? () : ("\n"));
         print $err @_, @eor and return;
         my $old_err = delete $self->{2};
         $old_err->close if $! == EPIPE && $old_err;
-        $err = $self->{2} = ($self->{pgr} // [])->[2] // *STDERR{GLOB};
+        $err = $self->{2} = ($self->{pgr} // [])->[2] // \*STDERR;
         print $err @_, @eor or print STDERR @_, @eor;
 }
 
@@ -1098,7 +1099,7 @@ sub path_to_fd {
 # caller needs to "-t $self->{1}" to check if tty
 sub start_pager {
         my ($self, $new_env) = @_;
-        chomp(my $pager = run_qx([qw(git var GIT_PAGER)]));
+        chomp(my $pager = run_qx([git_exe, qw(var GIT_PAGER)]));
         warn "`git var PAGER' error: \$?=$?" if $?;
         return if $pager eq 'cat' || $pager eq '';
         $new_env //= {};
@@ -1556,7 +1557,7 @@ sub sto_barrier_request {
                 eval { $lei->{sto}->wq_do('schedule_commit', $n) };
         } else {
                 my $s = ($wq ? $wq->{lei_sock} : undef) // $lei->{sock};
-                my $errfh = $lei->{2} // *STDERR{GLOB};
+                my $errfh = $lei->{2} // \*STDERR;
                 my @io = $s ? ($errfh, $s) : ($errfh);
                 eval { $lei->{sto}->wq_io_do('barrier', \@io, 1) };
         }
diff --git a/lib/PublicInbox/LeiBlob.pm b/lib/PublicInbox/LeiBlob.pm
index 7b2ea434..31936c36 100644
--- a/lib/PublicInbox/LeiBlob.pm
+++ b/lib/PublicInbox/LeiBlob.pm
@@ -10,14 +10,14 @@ use parent qw(PublicInbox::IPC);
 use PublicInbox::Spawn qw(run_wait run_qx which);
 use PublicInbox::DS;
 use PublicInbox::Eml;
-use PublicInbox::Git;
+use PublicInbox::Git qw(git_exe);
 use PublicInbox::IO qw(read_all);
 
 sub get_git_dir ($$) {
         my ($lei, $d) = @_;
         return $d if -d "$d/objects" && -d "$d/refs" && -e "$d/HEAD";
 
-        my $cmd = [ qw(git rev-parse --git-dir) ];
+        my $cmd = [ git_exe, qw(rev-parse --git-dir) ];
         my $opt = { '-C' => $d };
         if (defined($lei->{opt}->{cwd})) { # --cwd used, report errors
                 $opt->{2} = $lei->{2};
diff --git a/lib/PublicInbox/LeiConfig.pm b/lib/PublicInbox/LeiConfig.pm
index a50ff2b6..ae12249f 100644
--- a/lib/PublicInbox/LeiConfig.pm
+++ b/lib/PublicInbox/LeiConfig.pm
@@ -3,6 +3,7 @@
 package PublicInbox::LeiConfig; # subclassed by LeiEditSearch
 use v5.12;
 use PublicInbox::PktOp;
+use PublicInbox::Git qw(git_exe);
 use Fcntl qw(SEEK_SET);
 use autodie qw(open seek);
 use PublicInbox::IO qw(read_all);
@@ -11,7 +12,7 @@ sub cfg_do_edit ($;$) {
         my ($self, $reason) = @_;
         my $lei = $self->{lei};
         $lei->pgr_err($reason) if defined $reason;
-        my $cmd = [ qw(git config --edit -f), $self->{-f} ];
+        my $cmd = [ git_exe, qw(config --edit -f), $self->{-f} ];
         my $env = { GIT_CONFIG => $self->{-f} };
         $self->cfg_edit_begin if $self->can('cfg_edit_begin');
         # run in script/lei foreground
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index c388f7dc..0a6aba82 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -5,6 +5,7 @@
 package PublicInbox::LeiInput;
 use v5.12;
 use PublicInbox::DS;
+use PublicInbox::Git qw(git_exe);
 use PublicInbox::Spawn qw(which popen_rd);
 use PublicInbox::InboxWritable qw(eml_from_path);
 
@@ -252,7 +253,8 @@ sub input_path_url {
                 each_ibx_eml($self, $esrch, @args);
         } elsif ($self->{missing_ok} && !-e $input) { # don't ->fail
                 if ($lei->{cmd} eq 'p2q') {
-                        my $fp = [ qw(git format-patch --stdout -1), $input ];
+                        my $fp = [ git_exe, qw(format-patch --stdout -1),
+                                        $input ];
                         my $rdr = { 2 => $lei->{2} };
                         my $fh = popen_rd($fp, undef, $rdr);
                         eval { $self->input_fh('eml', $fh, $input, @args) };
diff --git a/lib/PublicInbox/LeiMailDiff.pm b/lib/PublicInbox/LeiMailDiff.pm
index af6ecf82..270de507 100644
--- a/lib/PublicInbox/LeiMailDiff.pm
+++ b/lib/PublicInbox/LeiMailDiff.pm
@@ -7,13 +7,14 @@ package PublicInbox::LeiMailDiff;
 use v5.12;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput PublicInbox::MailDiff);
 use PublicInbox::Spawn qw(run_wait);
+use PublicInbox::Git qw(git_exe);
 require PublicInbox::LeiRediff;
 
 sub diff_a ($$) {
         my ($self, $eml) = @_;
         my $dir = "$self->{tmp}/N".(++$self->{nr});
         $self->dump_eml($dir, $eml);
-        my $cmd = [ qw(git diff --no-index) ];
+        my $cmd = [ git_exe, qw(diff --no-index) ];
         my $lei = $self->{lei};
         PublicInbox::LeiRediff::_lei_diff_prepare($lei, $cmd);
         push @$cmd, qw(-- a), "N$self->{nr}";
diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
index 9ae9dcdb..c27b6f86 100644
--- a/lib/PublicInbox/LeiSavedSearch.pm
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -5,7 +5,7 @@
 package PublicInbox::LeiSavedSearch;
 use v5.12;
 use parent qw(PublicInbox::Lock);
-use PublicInbox::Git;
+use PublicInbox::Git qw(git_exe);
 use PublicInbox::OverIdx;
 use PublicInbox::LeiSearch;
 use PublicInbox::Config;
@@ -176,7 +176,7 @@ sub description { $_[0]->{qstr} } # for WWW
 sub cfg_set { # called by LeiXSearch
         my ($self, @args) = @_;
         my $lk = $self->lock_for_scope; # git-config doesn't wait
-        run_die([qw(git config -f), $self->{'-f'}, @args]);
+        run_die([git_exe, qw(config -f), $self->{'-f'}, @args]);
 }
 
 # drop-in for LeiDedupe API
@@ -263,8 +263,6 @@ sub reset_dedupe {
 
 sub mm { undef }
 
-sub altid_map { {} }
-
 sub cloneurl { [] }
 
 # find existing directory containing a `lei.saved-search' file based on
diff --git a/lib/PublicInbox/LeiViewText.pm b/lib/PublicInbox/LeiViewText.pm
index c7d72c71..6510b19e 100644
--- a/lib/PublicInbox/LeiViewText.pm
+++ b/lib/PublicInbox/LeiViewText.pm
@@ -12,6 +12,7 @@ use PublicInbox::View;
 use PublicInbox::Hval;
 use PublicInbox::ViewDiff;
 use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Git qw(git_exe);
 use Term::ANSIColor;
 use POSIX ();
 use PublicInbox::Address;
@@ -72,7 +73,7 @@ sub new {
         my $self = bless { %{$lei->{opt}}, -colored => \&uncolored }, $cls;
         $self->{-quote_reply} = 1 if $fmt eq 'reply';
         return $self unless $self->{color} //= -t $lei->{1};
-        my @cmd = qw(git config -z --includes -l); # reuse normal git config
+        my @cmd = (git_exe, qw(config -z --includes -l)); # reuse normal git cfg
         my $r = popen_rd(\@cmd, undef, { 2 => $lei->{2} });
         my $cfg = PublicInbox::Config::config_fh_parse($r, "\0", "\n");
         if (!$r->close) {
diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm
index 125360fe..ce268bfb 100644
--- a/lib/PublicInbox/MailDiff.pm
+++ b/lib/PublicInbox/MailDiff.pm
@@ -7,6 +7,7 @@ use PublicInbox::ContentHash qw(content_digest);
 use PublicInbox::MsgIter qw(msg_part_text);
 use PublicInbox::ViewDiff qw(flush_diff);
 use PublicInbox::GitAsyncCat;
+use PublicInbox::Git qw(git_exe);
 use PublicInbox::ContentDigestDbg;
 use PublicInbox::Qspawn;
 use PublicInbox::IO qw(write_file);
@@ -81,7 +82,7 @@ sub do_diff {
         my $n = 'N'.(++$self->{nr});
         my $dir = "$self->{tmp}/$n";
         $self->dump_eml($dir, $eml);
-        my $cmd = [ qw(git diff --no-index --no-color -- a), $n ];
+        my $cmd = [ git_exe, qw(diff --no-index --no-color -- a), $n ];
         my $opt = { -C => "$self->{tmp}", quiet => 1 };
         my $qsp = PublicInbox::Qspawn->new($cmd, undef, $opt);
         $qsp->psgi_qx($self->{ctx}->{env}, undef, \&emit_msg_diff, $self);
diff --git a/lib/PublicInbox/MultiGit.pm b/lib/PublicInbox/MultiGit.pm
index b7691806..32bb3588 100644
--- a/lib/PublicInbox/MultiGit.pm
+++ b/lib/PublicInbox/MultiGit.pm
@@ -7,6 +7,7 @@ use strict;
 use v5.10.1;
 use PublicInbox::Spawn qw(run_die run_qx);
 use PublicInbox::Import;
+use PublicInbox::Git qw(git_exe);
 use File::Temp 0.19;
 use List::Util qw(max);
 use PublicInbox::IO qw(read_all);
@@ -110,11 +111,12 @@ sub epoch_cfg_set {
         my ($self, $epoch_nr) = @_;
         my $f = epoch_dir($self)."/$epoch_nr.git/config";
         my $v = "../../$self->{all}/config";
+        my @cmd = (git_exe, qw(config -f), $f, 'include.path');
         if (-r $f) {
-                chomp(my $x = run_qx([qw(git config -f), $f, 'include.path']));
+                chomp(my $x = run_qx(\@cmd));
                 return if $x eq $v;
         }
-        run_die([qw(git config -f), $f, 'include.path', $v ]);
+        run_die [@cmd, $v];
 }
 
 sub add_epoch {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index e5c5d6ab..eb5e67ba 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -54,6 +54,9 @@ use constant {
         #
         #      v1.6.0 adds BYTES, UID and THREADID values
         SCHEMA_VERSION => 15,
+
+        # we may have up to 8 FDs per shard (depends on Xapian *shrug*)
+        SHARD_COST => 8,
 };
 
 use PublicInbox::Smsg;
@@ -477,7 +480,7 @@ sub async_mset {
         my ($self, $qry_str, $opt, $cb, @args) = @_;
         if ($XHC) { # unconditionally retrieving pct + rank for now
                 xdb($self); # populate {nshards}
-                my @margs = ($self->xh_args, xh_opt($self, $opt));
+                my @margs = ($self->xh_args, xh_opt($self, $opt), '--');
                 my $ret = eval {
                         my $rd = $XHC->mkreq(undef, 'mset', @margs, $qry_str);
                         PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args);
@@ -729,4 +732,17 @@ sub get_doc ($$) {
         }
 }
 
+# not sure where best to put this...
+sub ulimit_n () {
+        my $n;
+        if (eval { require BSD::Resource; 1 }) {
+                my $NOFILE = BSD::Resource::RLIMIT_NOFILE();
+                ($n, undef) = BSD::Resource::getrlimit($NOFILE);
+        } else {
+                require PublicInbox::Spawn;
+                $n = PublicInbox::Spawn::run_qx([qw(/bin/sh -c), 'ulimit -n']);
+        }
+        $n;
+}
+
 1;
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 00ae9fac..672c53ad 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -33,19 +33,24 @@ sub thread {
         # can be shakier if somebody used In-Reply-To with multiple, disparate
         # messages.  So, take the client Date: into account since we can't
         # always determine ordering when somebody uses multiple In-Reply-To.
+        my (%dedupe, $mid);
         my @kids = sort { $a->{ds} <=> $b->{ds} } grep {
                 # this delete saves around 4K across 1K messages
                 # TODO: move this to a more appropriate place, breaks tests
                 # if we do it during psgi_cull
                 delete $_->{num};
                 bless $_, 'PublicInbox::SearchThread::Msg';
-                if (exists $id_table{$_->{mid}}) {
+                $mid = $_->{mid};
+                if (exists $id_table{$mid}) {
                         $_->{children} = [];
                         push @imposters, $_; # we'll deal with them later
                         undef;
                 } else {
                         $_->{children} = {}; # will become arrayref later
-                        $id_table{$_->{mid}} = $_;
+                        %dedupe = ($mid => undef);
+                        ($mid) = keys %dedupe;
+                        $_->{mid} = $mid;
+                        $id_table{$mid} = $_;
                         defined($_->{references});
                 }
         } @$msgs;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 9919e25c..f056dddf 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -154,9 +154,15 @@ sub path2inc ($) {
         if (my $short = $rmap_inc{$full}) {
                 return $short;
         } elsif (!scalar(keys %rmap_inc) && -e $full) {
-                %rmap_inc = map {; "$INC{$_}" => $_ } keys %INC;
+                # n.b. $INC{'PublicInbox::Gcf2'} is undef if libgit2-dev
+                # doesn't exist
+                my $f;
+                %rmap_inc = map {;
+                        $f = $INC{$_};
+                        defined $f ? ($f, $_) : ();
+                } keys %INC;
                 # fall back to basename as last resort
-                $rmap_inc{$full} // (split('/', $full))[-1];
+                $rmap_inc{$full} // (split(m'/', $full))[-1];
         } else {
                 $full;
         }
@@ -310,13 +316,12 @@ sub mset_thread {
         my $rootset = PublicInbox::SearchThread::thread($msgs,
                 $r ? \&sort_relevance : \&PublicInbox::View::sort_ds,
                 $ctx);
-        my $skel = search_nav_bot($ctx, $mset, $q).'<pre>'. <<EOM;
+        $ctx->{skel} = [ search_nav_bot($ctx, $mset, $q).'<pre>'. <<EOM ];
 -- pct% links below jump to the message on this page, permalinks otherwise --
 EOM
         $ctx->{-upfx} = '';
         $ctx->{anchor_idx} = 1;
         $ctx->{cur_level} = 0;
-        $ctx->{skel} = \$skel;
         $ctx->{mapping} = {};
         $ctx->{searchview} = 1;
         $ctx->{prev_attr} = '';
@@ -326,7 +331,7 @@ EOM
         # reduce hash lookups in skel_dump
         $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
         PublicInbox::View::walk_thread($rootset, $ctx,
-                \&PublicInbox::View::pre_thread);
+                \&PublicInbox::View::pre_thread); # pushes to ctx->{skel}
 
         # link $INBOX_DIR/description text to "recent" view around
         # the newest message in this result set:
@@ -343,7 +348,7 @@ sub mset_thread_i {
         print { $ctx->zfh } $ctx->html_top if exists $ctx->{-html_tip};
         $eml and return PublicInbox::View::eml_entry($ctx, $eml);
         my $smsg = shift @{$ctx->{msgs}} or
-                print { $ctx->zfh } ${delete($ctx->{skel})};
+                print { $ctx->zfh } @{delete($ctx->{skel})};
         $smsg;
 }
 
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 7984af43..898ca72d 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -13,7 +13,7 @@ use v5.10.1;
 use File::Temp 0.19 (); # 0.19 for ->newdir
 use autodie qw(mkdir);
 use Fcntl qw(SEEK_SET);
-use PublicInbox::Git qw(git_unquote git_quote);
+use PublicInbox::Git qw(git_unquote git_quote git_exe);
 use PublicInbox::IO qw(write_file);
 use PublicInbox::MsgIter qw(msg_part_text);
 use PublicInbox::Qspawn;
@@ -176,7 +176,7 @@ sub extract_diff ($$) {
                 (?:^---\x20$FN$LF)
 
                 # "+++ b/foo.c" sets post-filename ($11) in case
-                # $3 is missing
+                # $3 is missing or truncated
                 (?:^\+{3}\x20$FN$LF)
 
                 # the meat of the diff, including "^\\No newline ..."
@@ -193,7 +193,8 @@ sub extract_diff ($$) {
                 mode_a => $5 // $8 // $4, # new (file) // unchanged // old
         };
         my $path_a = $2 // $10;
-        my $path_b = $3 // $11;
+        my $path_b = defined $11 && defined $3 && length $11 > length $3 ?
+                        $11 // $3 : $3 // $11;
         my $patch = $9;
 
         # don't care for leading 'a/' and 'b/'
@@ -293,7 +294,7 @@ sub prepare_index ($) {
 
         dbg($self, 'preparing index');
         my $rdr = { 0 => $in };
-        my $cmd = [ qw(git update-index -z --index-info) ];
+        my $cmd = [ git_exe, qw(update-index -z --index-info) ];
         my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr);
         $path_a = git_quote($path_a);
         $self->{-msg} = "index prepared:\n$mode_a $oid_full\t$path_a";
@@ -473,7 +474,7 @@ sub apply_result ($$) { # qx_cb
                 skip_identical($self, $patches, $di->{oid_b});
         }
 
-        my @cmd = qw(git ls-files -s -z);
+        my @cmd = (git_exe, qw(ls-files -s -z));
         my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env});
         $self->{-cur_di} = $di;
         qsp_qx $self, $qsp, \&ls_files_result;
@@ -484,7 +485,7 @@ sub do_git_apply ($) {
         my $patches = $self->{patches};
 
         # we need --ignore-whitespace because some patches are CRLF
-        my @cmd = (qw(git apply --cached --ignore-whitespace
+        my @cmd = (git_exe, qw(apply --cached --ignore-whitespace
                         --unidiff-zero --whitespace=warn --verbose));
         my $len = length(join(' ', @cmd));
         my $di; # keep track of the last one for "git ls-files"
diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm
index 4cbe9623..ebcedb89 100644
--- a/lib/PublicInbox/Syscall.pm
+++ b/lib/PublicInbox/Syscall.pm
@@ -22,7 +22,8 @@ use POSIX qw(ENOENT ENOSYS EINVAL O_NONBLOCK);
 use Socket qw(SOL_SOCKET SCM_RIGHTS);
 use Config;
 our %SIGNUM = (WINCH => 28); # most Linux, {Free,Net,Open}BSD, *Darwin
-our ($INOTIFY, %PACK);
+our ($INOTIFY, %CONST);
+use List::Util qw(sum);
 
 # $VERSION = '0.25'; # Sys::Syscall version
 our @EXPORT_OK = qw(epoll_ctl epoll_create epoll_wait
@@ -60,7 +61,7 @@ our ($SYS_epoll_create,
         $SYS_recvmsg);
 
 my $SYS_fstatfs; # don't need fstatfs64, just statfs.f_type
-my ($FS_IOC_GETFLAGS, $FS_IOC_SETFLAGS);
+my ($FS_IOC_GETFLAGS, $FS_IOC_SETFLAGS, $SYS_writev);
 my $SFD_CLOEXEC = 02000000; # Perl does not expose O_CLOEXEC
 our $no_deprecated = 0;
 
@@ -94,6 +95,7 @@ if ($^O eq "linux") {
                 $SYS_fstatfs = 100;
                 $SYS_sendmsg = 370;
                 $SYS_recvmsg = 372;
+                $SYS_writev = 146;
                 $INOTIFY = { # usage: `use constant $PublicInbox::Syscall::INOTIFY'
                         SYS_inotify_init1 => 332,
                         SYS_inotify_add_watch => 292,
@@ -110,6 +112,7 @@ if ($^O eq "linux") {
                 $SYS_fstatfs = 138;
                 $SYS_sendmsg = 46;
                 $SYS_recvmsg = 47;
+                $SYS_writev = 20;
                 $INOTIFY = {
                         SYS_inotify_init1 => 294,
                         SYS_inotify_add_watch => 254,
@@ -126,6 +129,7 @@ if ($^O eq "linux") {
                 $SYS_fstatfs = 138;
                 $SYS_sendmsg = 0x40000206;
                 $SYS_recvmsg = 0x40000207;
+                $SYS_writev = 0x40000204;
                 $FS_IOC_GETFLAGS = 0x80046601;
                 $FS_IOC_SETFLAGS = 0x40046602;
                 $INOTIFY = {
@@ -163,6 +167,7 @@ if ($^O eq "linux") {
                 $SYS_fstatfs = 100;
                 $SYS_sendmsg = 341;
                 $SYS_recvmsg = 342;
+                $SYS_writev = 146;
                 $FS_IOC_GETFLAGS = 0x40086601;
                 $FS_IOC_SETFLAGS = 0x80086602;
                 $INOTIFY = {
@@ -178,6 +183,7 @@ if ($^O eq "linux") {
                 $SYS_signalfd4 = 313;
                 $SYS_renameat2 //= 357;
                 $SYS_fstatfs = 100;
+                $SYS_writev = 146;
                 $FS_IOC_GETFLAGS = 0x40086601;
                 $FS_IOC_SETFLAGS = 0x80086602;
         } elsif ($machine =~ m/^s390/) { # untested, no machine on cfarm
@@ -190,6 +196,7 @@ if ($^O eq "linux") {
                 $SYS_fstatfs = 100;
                 $SYS_sendmsg = 370;
                 $SYS_recvmsg = 372;
+                $SYS_writev = 146;
         } elsif ($machine eq 'ia64') { # untested, no machine on cfarm
                 $SYS_epoll_create = 1243;
                 $SYS_epoll_ctl = 1244;
@@ -215,6 +222,7 @@ if ($^O eq "linux") {
                 $SYS_fstatfs = 44;
                 $SYS_sendmsg = 211;
                 $SYS_recvmsg = 212;
+                $SYS_writev = 66;
                 $INOTIFY = {
                         SYS_inotify_init1 => 26,
                         SYS_inotify_add_watch => 27,
@@ -232,6 +240,7 @@ if ($^O eq "linux") {
                 $SYS_fstatfs = 100;
                 $SYS_sendmsg = 296;
                 $SYS_recvmsg = 297;
+                $SYS_writev = 146;
         } elsif ($machine =~ m/^mips64/) { # cfarm only has 32-bit userspace
                 $SYS_epoll_create = 5207;
                 $SYS_epoll_ctl = 5208;
@@ -242,6 +251,7 @@ if ($^O eq "linux") {
                 $SYS_fstatfs = 5135;
                 $SYS_sendmsg = 5045;
                 $SYS_recvmsg = 5046;
+                $SYS_writev = 5019;
                 $FS_IOC_GETFLAGS = 0x40046601;
                 $FS_IOC_SETFLAGS = 0x80046602;
         } elsif ($machine =~ m/^mips/) { # 32-bit, tested on mips64 cfarm host
@@ -254,6 +264,7 @@ if ($^O eq "linux") {
                 $SYS_fstatfs = 4100;
                 $SYS_sendmsg = 4179;
                 $SYS_recvmsg = 4177;
+                $SYS_writev = 4146;
                 $FS_IOC_GETFLAGS = 0x40046601;
                 $FS_IOC_SETFLAGS = 0x80046602;
                 $SIGNUM{WINCH} = 20;
@@ -286,11 +297,13 @@ EOM
 # (I'm assuming Dragonfly copies FreeBSD, here, too)
         $SYS_recvmsg = 27;
         $SYS_sendmsg = 28;
+        $SYS_writev = 121;
 }
 
 BEGIN {
         if ($^O eq 'linux') {
-                %PACK = (
+                %CONST = (
+                        MSG_MORE => 0x8000,
                         TMPL_cmsg_len => TMPL_size_t,
                         # cmsg_len, cmsg_level, cmsg_type
                         SIZEOF_cmsghdr => SIZEOF_int * 2 + SIZEOF_size_t,
@@ -303,7 +316,7 @@ BEGIN {
                                 'i', # msg_flags
                 );
         } elsif ($^O =~ /\A(?:freebsd|openbsd|netbsd|dragonfly)\z/) {
-                %PACK = (
+                %CONST = (
                         TMPL_cmsg_len => 'L', # socklen_t
                         SIZEOF_cmsghdr => SIZEOF_int * 3,
                         CMSG_DATA_off => SIZEOF_ptr == 8 ? '@16' : '',
@@ -316,11 +329,12 @@ BEGIN {
 
                 )
         }
-        $PACK{CMSG_ALIGN_size} = SIZEOF_size_t;
-        $PACK{SIZEOF_cmsghdr} //= 0;
-        $PACK{TMPL_cmsg_len} //= undef;
-        $PACK{CMSG_DATA_off} //= undef;
-        $PACK{TMPL_msghdr} //= undef;
+        $CONST{CMSG_ALIGN_size} = SIZEOF_size_t;
+        $CONST{SIZEOF_cmsghdr} //= 0;
+        $CONST{TMPL_cmsg_len} //= undef;
+        $CONST{CMSG_DATA_off} //= undef;
+        $CONST{TMPL_msghdr} //= undef;
+        $CONST{MSG_MORE} //= 0;
 }
 
 # SFD_CLOEXEC is arch-dependent, so IN_CLOEXEC may be, too
@@ -455,7 +469,7 @@ sub nodatacow_dir {
         if (open my $fh, '<', $_[0]) { nodatacow_fh($fh) }
 }
 
-use constant \%PACK;
+use constant \%CONST;
 sub CMSG_ALIGN ($) { ($_[0] + CMSG_ALIGN_size - 1) & ~(CMSG_ALIGN_size - 1) }
 use constant CMSG_ALIGN_SIZEOF_cmsghdr => CMSG_ALIGN(SIZEOF_cmsghdr);
 sub CMSG_SPACE ($) { CMSG_ALIGN($_[0]) + CMSG_ALIGN_SIZEOF_cmsghdr }
@@ -463,8 +477,9 @@ sub CMSG_LEN ($) { CMSG_ALIGN_SIZEOF_cmsghdr + $_[0] }
 use constant msg_controllen_max =>
         CMSG_SPACE(10 * SIZEOF_int) + SIZEOF_cmsghdr; # space for 10 FDs
 
-if (defined($SYS_sendmsg) && defined($SYS_recvmsg)) {
 no warnings 'once';
+
+if (defined($SYS_sendmsg) && defined($SYS_recvmsg)) {
 require PublicInbox::CmdIPC4;
 
 *send_cmd4 = sub ($$$$;$) {
@@ -527,6 +542,35 @@ require PublicInbox::CmdIPC4;
         }
         @ret;
 };
+
+*sendmsg_more = sub ($@) {
+        use bytes qw(length substr);
+        my $sock = shift;
+        my $iov = join('', map { pack 'P'.TMPL_size_t, $_, length } @_);
+        my $mh = pack(TMPL_msghdr,
+                        undef, 0, # msg_name, msg_namelen (unused)
+                        $iov, scalar(@_), # msg_iov, msg_iovlen
+                        undef, 0, # msg_control, msg_controllen (unused),
+                        0); # msg_flags (unused)
+        my $s;
+        do {
+                $s = syscall($SYS_sendmsg, fileno($sock), $mh, MSG_MORE);
+        } while ($s < 0 && $!{EINTR});
+        $s < 0 ? undef : $s;
+};
+}
+
+if (defined($SYS_writev)) {
+*writev = sub {
+        my $fh = shift;
+        use bytes qw(length substr);
+        my $iov = join('', map { pack 'P'.TMPL_size_t, $_, length } @_);
+        my $w;
+        do {
+                $w = syscall($SYS_writev, fileno($fh), $iov, scalar(@_));
+        } while ($w < 0 && $!{EINTR});
+        $w < 0 ? undef : $w;
+};
 }
 
 1;
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 3a67ab54..00e96aee 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -167,11 +167,8 @@ sub require_git ($;$) {
 sub require_git_http_backend (;$) {
         my ($nr) = @_;
         state $ok = do {
-                require PublicInbox::Git;
-                my $git = PublicInbox::Git::git_exe() or plan
-                        skip_all => 'nothing in public-inbox works w/o git';
                 my $rdr = { 1 => \my $out, 2 => \my $err };
-                xsys([$git, qw(http-backend)], undef, $rdr);
+                xsys([qw(git http-backend)], undef, $rdr);
                 $out =~ /^Status:/ism;
         };
         if (!$ok) {
@@ -274,15 +271,16 @@ sub require_mods {
 
 sub key2script ($) {
         my ($key) = @_;
-        return $key if ($key eq 'git' || index($key, '/') >= 0);
+        require PublicInbox::Git;
+        return PublicInbox::Git::git_exe() if $key eq 'git';
+        return $key if index($key, '/') >= 0;
         # n.b. we may have scripts which don't start with "public-inbox" in
         # the future:
         $key =~ s/\A([-\.])/public-inbox$1/;
         'blib/script/'.$key;
 }
 
-my @io_mode = ([ *STDIN{IO}, '+<&' ], [ *STDOUT{IO}, '+>&' ],
-                [ *STDERR{IO}, '+>&' ]);
+my @io_mode = ([ \*STDIN, '+<&' ], [ \*STDOUT, '+>&' ], [ \*STDERR, '+>&' ]);
 
 sub _prepare_redirects ($) {
         my ($fhref) = @_;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 44e1f2a8..9a90f939 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -74,9 +74,13 @@ sub msg_page {
         my ($id, $prev);
         my $next_arg = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
 
-        my $smsg = $ctx->{smsg} = $over->next_by_mid(@$next_arg) or
-                return; # undef == 404
-
+        my $smsg = $ctx->{smsg} = $over->next_by_mid(@$next_arg);
+        if (!$smsg && $ctx->{mid} =~ /\A\<(.+)\>\z/ and
+                        ($next_arg->[0] = $1) and
+                        ($over->next_by_mid(@$next_arg))) {
+                return PublicInbox::WWW::r301($ctx, undef, $next_arg->[0]);
+        }
+        $smsg or return; # undef=404
         # allow user to easily browse the range around this message if
         # they have ->over
         $ctx->{-t_max} = $smsg->{ts};
@@ -307,7 +311,8 @@ sub eml_entry {
                 " <a\nhref=\"${mhref}raw\">raw</a>" .
                 " <a\nhref=\"${mhref}#R\">reply</a>";
 
-        delete($ctx->{-qry}) and
+        # points to permalink
+        delete($ctx->{-qry_dfblob}) and
                 $rv .= qq[ <a\nhref="${mhref}#related">related</a>];
 
         my $hr;
@@ -432,6 +437,7 @@ sub walk_thread ($$$) {
 
 sub pre_thread  { # walk_thread callback
         my ($ctx, $level, $node, $idx) = @_;
+        # node->{mid} is deduplicated in PublicInbox::SearchThread::thread
         $ctx->{mapping}->{$node->{mid}} = [ '', $node, $idx, $level ];
         skel_dump($ctx, $level, $node);
 }
@@ -476,7 +482,7 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
                         print { $ctx->zfh } ghost_index_entry($ctx, $lvl, $smsg)
                 } else { # all done
                         print { $ctx->zfh } thread_adj_level($ctx, 0),
-                                                ${delete($ctx->{skel})};
+                                                @{delete($ctx->{skel})};
                         return;
                 }
         }
@@ -513,11 +519,13 @@ href="../../">newest</a>]
 EOF
         $skel .= "<b\nid=t>Thread overview:</b> ";
         $skel .= $nr == 1 ? '(only message)' : "$nr+ messages";
-        $skel .= " (download: <a\nhref=\"../t.mbox.gz\">mbox.gz</a>";
-        $skel .= " / follow: <a\nhref=\"../t.atom\">Atom feed</a>)\n";
-        $skel .= "-- links below jump to the message on this page --\n";
+        $skel .= <<EOM;
+ (download: <a\nhref="../t.mbox.gz">mbox.gz</a> follow: <a
+href=\"../t.atom\">Atom feed</a>
+-- links below jump to the message on this page --
+EOM
         $ctx->{cur_level} = 0;
-        $ctx->{skel} = \$skel;
+        $ctx->{skel} = [ $skel ];
         $ctx->{prev_attr} = '';
         $ctx->{prev_level} = 0;
         $ctx->{root_anchor} = 'm' . id_compress($mid, 1);
@@ -529,9 +537,9 @@ EOF
 
         # reduce hash lookups in pre_thread->skel_dump
         $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
-        walk_thread($rootset, $ctx, \&pre_thread);
+        walk_thread($rootset, $ctx, \&pre_thread); # pushes to ctx->{skel}
 
-        $skel .= '</pre>';
+        push @{$ctx->{skel}}, '</pre>';
         return stream_thread($rootset, $ctx) unless $ctx->{flat};
 
         # flat display: lazy load the full message from smsg
@@ -553,8 +561,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
                 while (my $smsg = shift @{$ctx->{msgs}}) {
                         return $smsg if exists($smsg->{blob});
                 }
-                my $skel = delete($ctx->{skel}) or return; # all done
-                print { $ctx->zfh } $$skel;
+                print { $ctx->zfh } @{delete $ctx->{skel} // []};
                 undef;
         }
 }
@@ -698,6 +705,7 @@ href="d/">diff</a>)</pre><pre>];
                 }
         }
         my @subj = $eml->header('Subject');
+        $ctx->{subj_raw} = $subj[0];
         $hbuf .= "Subject: $_\n" for @subj;
         $title[0] = $subj[0] // '(no subject)';
         $hbuf .= "Date: $_\n" for $eml->header('Date');
@@ -778,13 +786,13 @@ sub thread_skel ($$$) {
         my $ibx = $ctx->{ibx};
         my ($nr, $msgs) = $ibx->over->get_thread($mid);
         my $parent = in_reply_to($hdr);
-        $$skel .= "\n<b>Thread overview: </b>";
+        $skel->[-1] .= "\n<b>Thread overview: </b>";
         if ($nr <= 1) {
                 if (defined $parent) {
-                        $$skel .= SKEL_EXPAND."\n ";
-                        $$skel .= ghost_parent('../', $parent) . "\n";
+                        $skel->[-1] .= SKEL_EXPAND."\n ";
+                        $skel->[-1] .= ghost_parent('../', $parent) . "\n";
                 } else {
-                        $$skel .= "<a\nid=r>[no followups]</a> ".
+                        $skel->[-1] .= "<a\nid=r>[no followups]</a> ".
                                         SKEL_EXPAND."\n";
                 }
                 $ctx->{next_msg} = undef;
@@ -792,8 +800,9 @@ sub thread_skel ($$$) {
                 return;
         }
 
-        $$skel .= $nr;
-        $$skel .= '+ messages / '.SKEL_EXPAND.qq!  <a\nhref="#b">top</a>\n!;
+        $skel->[-1] .= $nr;
+        $skel->[-1] .= '+ messages / '.SKEL_EXPAND.
+                        qq!  <a\nhref="#b">top</a>\n!;
 
         # nb: mutt only shows the first Subject in the index pane
         # when multiple Subject: headers are present, so we follow suit:
@@ -811,25 +820,38 @@ sub thread_skel ($$$) {
         $ctx->{parent_msg} = $parent;
 }
 
+sub dfqry_text ($$) {
+        my ($ctx, $subj) = @_;
+        my $qry_dfblob = delete $ctx->{-qry_dfblob} or return (undef);
+        my @bs = split /["\x{201c}\x{201d}]+/, $subj;
+        my $q = join ' ', (@bs ? ('(') : ()), map {
+                chop if length > 7; # include 1 abbrev "older" patches
+                "dfblob:$_";
+        } @$qry_dfblob;
+        local $Text::Wrap::columns = COLS;
+        local $Text::Wrap::huge = 'overflow';
+        $subj //= '';
+        $subj =~ s/\A\s*(?:amend|fixup|squash)\!\s*//; # --autosquash commands
+        # split on double-quotes for phrases
+        $q = wrap('', '', $q);
+        if (@bs) {
+                $q .= " )\n OR (";
+                $q .= qq[\nbs:"$_"] for @bs;
+                $q .= ' )';
+        }
+        my $rows = ($q =~ tr/\n/\n/) + 1;
+        ($rows, ascii_html($q));
+}
+
 # writes to zbuf
 sub html_footer {
         my ($ctx, $hdr) = @_;
         my $upfx = '../';
-        my (@related, $skel);
+        my (@related, @skel);
         my $foot = '<pre>';
-        my $qry = delete $ctx->{-qry};
-        if ($qry && $ctx->{ibx}->isrch) {
-                my $q = ''; # search for either ancestor or descendent patches
-                for (@{$qry->{dfpre}}, @{$qry->{dfpost}}) {
-                        chop if length > 7; # include 1 abbrev "older" patches
-                        $q .= "dfblob:$_ ";
-                }
-                chop $q; # omit trailing SP
-                local $Text::Wrap::columns = COLS;
-                local $Text::Wrap::huge = 'overflow';
-                $q = wrap('', '', $q);
-                my $rows = ($q =~ tr/\n/\n/) + 1;
-                $q = ascii_html($q);
+        my ($rows, $q) = dfqry_text $ctx,
+                        delete($ctx->{-qry_subj}) // $ctx->{subj_raw};
+        if ($rows && $ctx->{ibx}->isrch) {
                 $related[0] = <<EOM;
 <form id=related
 action=$upfx
@@ -847,12 +869,12 @@ EOM
                 my $t = ts2str($ctx->{-t_max});
                 my $t_fmt = fmt_ts($ctx->{-t_max});
                 my $fallback = @related ? "\t" : "<a id=related>\t</a>";
-                $skel = <<EOF;
+                $skel[0] = <<EOF;
 ${fallback}other threads:[<a
 href="$upfx?t=$t">~$t_fmt UTC</a>|<a
 href="$upfx">newest</a>]
 EOF
-                thread_skel(\$skel, $ctx, $hdr);
+                thread_skel(\@skel, $ctx, $hdr);
                 my ($next, $prev);
                 my $parent = '       ';
                 $next = $prev = '    ';
@@ -879,11 +901,11 @@ EOF
                 }
                 $foot .= "$next $prev$parent ";
         } else { # unindexed inboxes w/o over
-                $skel = qq( <a\nhref="$upfx">latest</a>);
+                $skel[0] = qq( <a\nhref="$upfx">latest</a>);
         }
-        # $skel may be big for big threads, don't append it to $foot
+        # @skel may be big for big threads, don't push to it
         print { $ctx->zfh } $foot, qq(<a\nhref="#R">reply</a>),
-                                $skel, '</pre>', @related,
+                                @skel, '</pre>', @related,
                                 msg_reply($ctx, $hdr);
 }
 
@@ -985,7 +1007,8 @@ sub skel_dump { # walk_thread callback
         my $mid = $smsg->{mid};
 
         if ($level == 0 && $ctx->{skel_dump_roots}++) {
-                $$skel .= delete($ctx->{sl_note}) || '';
+                my $note = delete $ctx->{sl_note};
+                push @$skel, $note if $note;
         }
 
         my $f = ascii_html(delete $smsg->{from_name});
@@ -1014,7 +1037,7 @@ sub skel_dump { # walk_thread callback
         if ($cur) {
                 if ($cur eq $mid) {
                         delete $ctx->{cur};
-                        $$skel .= "<b>$d<a\nid=r\nhref=\"#t\">".
+                        push @$skel, "<b>$d<a\nid=r\nhref=\"#t\">".
                                  "$attr [this message]</a></b>\n";
                         return 1;
                 } else {
@@ -1054,8 +1077,7 @@ sub skel_dump { # walk_thread callback
         } else {
                 $m = $ctx->{-upfx}.mid_href($mid).'/';
         }
-        $$skel .=  $d . "<a\nhref=\"$m\"$id>" . $end;
-        1;
+        push @$skel, qq($d<a\nhref="$m"$id>$end);
 }
 
 sub _skel_ghost {
@@ -1078,8 +1100,7 @@ sub _skel_ghost {
         } else {
                 $d .= qq{&lt;<a\nhref="$href">$html</a>&gt;\n};
         }
-        ${$ctx->{skel}} .= $d;
-        1;
+        push @{$ctx->{skel}}, $d;
 }
 
 # note: we favor Date: here because git-send-email increments it
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index d078c5f9..7a6d9a2b 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -135,15 +135,14 @@ sub diff_header ($$$) {
         # no need to capture oid_a and oid_b on add/delete,
         # we just linkify OIDs directly via s///e in conditional
         if ($$x =~ s/$NULL_TO_BLOB/$1 . oid($dctx, $spfx, $2)/e) {
-                push @{$ctx->{-qry}->{dfpost}}, $2;
+                push @{$ctx->{-qry_dfblob}}, $2;
         } elsif ($$x =~ s/$BLOB_TO_NULL/'index '.oid($dctx, $spfx, $1).$2/e) {
-                push @{$ctx->{-qry}->{dfpre}}, $1;
+                push @{$ctx->{-qry_dfblob}}, $1;
         } elsif ($$x =~ $BLOB_TO_BLOB) {
                 # modification-only, not add/delete:
                 # linkify hunk headers later using oid_a and oid_b
                 @$dctx{qw(oid_a oid_b)} = ($1, $2);
-                push @{$ctx->{-qry}->{dfpre}}, $1;
-                push @{$ctx->{-qry}->{dfpost}}, $2;
+                push @{$ctx->{-qry_dfblob}}, $1, $2;
         } else {
                 warn "BUG? <$$x> had no ^index line";
         }
@@ -183,9 +182,10 @@ sub diff_before_or_after ($$) {
 sub flush_diff ($$) {
         my ($ctx, $cur) = @_;
 
+        my ($subj) = ($$cur =~ /^Subject:\s*\[[^\]]+\]\s*(.+?)$/sm);
         my @top = split($EXTRACT_DIFFS, $$cur);
         undef $$cur; # free memory
-
+        $ctx->{-qry_subj} = $subj if $subj;
         my $lnk = $ctx->{-linkify};
         my $dctx; # {}, keys: Q, oid_a, oid_b
         my $zfh = $ctx->zfh;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 83a83698..b69214bc 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -251,7 +251,7 @@ href="$ibx_url?t=$t"
 title="list contemporary emails">$2</a>)
                 !e;
 
-        $ctx->{-title_html} = $s = $ctx->{-linkify}->to_html($s);
+        my $title_html = $ctx->{-title_html} = $ctx->{-linkify}->to_html($s);
         my ($P, $p, $pt) = delete @$ctx{qw(-cmt_P -cmt_p -cmt_pt)};
         $_ = qq(<a href="$upfx$_/s/">).shift(@$p).'</a> '.shift(@$pt) for @$P;
         if (@$P == 1) {
@@ -273,7 +273,7 @@ href="$f.patch">patch</a>)\n   <a href=#parent>parent</a> $P->[0]};
    author $au
 committer $co
 
-<b>$s</b>
+<b>$title_html</b>
 EOM
         print $zfh "\n", $ctx->{-linkify}->to_html($bdy) if length($bdy);
         undef $bdy; # free memory
@@ -291,20 +291,8 @@ EOM
                 # TODO: should there be another textarea which attempts to
                 # search for the exact email which was applied to make this
                 # commit?
-                if (my $qry = delete $ctx->{-qry}) {
-                        my $q = '';
-                        for (@{$qry->{dfpost}}, @{$qry->{dfpre}}) {
-                                # keep blobs as short as reasonable, emails
-                                # are going to be older than what's in git
-                                substr($_, 7, 64, '');
-                                $q .= "dfblob:$_ ";
-                        }
-                        chop $q; # no trailing SP
-                        local $Text::Wrap::columns = PublicInbox::View::COLS;
-                        local $Text::Wrap::huge = 'overflow';
-                        $q = wrap('', '', $q);
-                        my $rows = ($q =~ tr/\n/\n/) + 1;
-                        $q = ascii_html($q);
+                my ($rows, $q) = PublicInbox::View::dfqry_text $ctx, $s;
+                if ($rows) {
                         my $ibx_url = ibx_url_for($ctx);
                         my $alt;
                         if (defined $ibx_url) {
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
index a5e2dc4a..5e086fee 100644
--- a/lib/PublicInbox/WwwCoderepo.pm
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -24,8 +24,9 @@ use PublicInbox::OnDestroy;
 use URI::Escape qw(uri_escape_utf8);
 use File::Spec;
 use autodie qw(fcntl open);
+use PublicInbox::Git qw(git_exe);
 
-my @EACH_REF = (qw(git for-each-ref --sort=-creatordate),
+my @EACH_REF = (git_exe, qw(for-each-ref --sort=-creatordate),
                 "--format=%(HEAD)%00".join('%00', map { "%($_)" }
                 qw(objectname refname:short subject creatordate:short)));
 my $HEADS_CMD = <<'';
@@ -87,7 +88,7 @@ sub new {
         my @s = stat(STDIN) or die "stat(STDIN): $!";
         if ("@l[0, 1]" eq "@s[0, 1]") {
                 my $f = fcntl(STDIN, F_GETFL, 0);
-                $self->{log_fh} = *STDIN{IO} if $f & O_RDWR;
+                $self->{log_fh} = \*STDIN if $f & O_RDWR;
         }
         $self;
 }
@@ -186,7 +187,7 @@ EOM
         print $zfh "...\n" if $last;
 
         # README
-        my ($bref, $oid, $ref_path) = @{delete $ctx->{qx_res}->{readme}};
+        my ($bref, $oid, $ref_path) = @{delete $ctx->{qx_res}->{readme} // []};
         if ($bref) {
                 my $l = PublicInbox::Linkify->new;
                 $$bref =~ s/\s*\z//sm;
@@ -249,7 +250,7 @@ sub summary ($$) {
         my $qsp_err = \($ctx->{-qsp_err} = '');
         my %opt = (quiet => 1, 2 => $ctx->{wcr}->{log_fh});
         my %env = (GIT_DIR => $ctx->{git}->{git_dir});
-        my @log = (qw(git log), "-$nl", '--pretty=format:%d %H %h %cs %s');
+        my @log = (git_exe, 'log', "-$nl", '--pretty=format:%d %H %h %cs %s');
         push(@log, $tip) if defined $tip;
 
         # limit scope for MockHTTP test (t/solver_git.t)
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index 5e23005e..8279591a 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -37,7 +37,7 @@ sub get_text {
         }
         my $env = $ctx->{env};
         if ($raw) {
-                my $h = delete $ctx->{-res_hdr};
+                my $h = delete $ctx->{-res_hdr} // [];
                 $txt = gzf_maybe($h, $env)->zflush($txt) if $code == 200;
                 push @$h, 'Content-Type', 'text/plain',
                         'Content-Length', length($txt);
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index c9957f64..ba41b5d2 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -18,7 +18,7 @@ use POSIX qw(:signal_h);
 use Fcntl qw(LOCK_UN LOCK_EX);
 use Carp qw(croak);
 my $X = \%PublicInbox::Search::X;
-our (%SRCH, %WORKERS, $nworker, $workerset, $in);
+our (%SRCH, %WORKERS, $nworker, $workerset, $in, $SHARD_NFD, $MY_FD_MAX);
 our $stderr = \*STDERR;
 
 sub cmd_test_inspect {
@@ -190,23 +190,43 @@ sub dispatch {
         $GLP->getoptionsfromarray(\@argv, $req, @PublicInbox::Search::XH_SPEC)
                 or return;
         my $dirs = delete $req->{d} or die 'no -d args';
-        my $key = join("\0", @$dirs);
+        my $key = "-d\0".join("\0-d\0", @$dirs);
+        $key .= "\0".join("\0", map { ('-Q', $_) } @{$req->{Q}}) if $req->{Q};
         my $new;
-        $req->{srch} = $SRCH{$key} //= do {
+        $req->{srch} = $SRCH{$key} // do {
                 $new = { qp_flags => $PublicInbox::Search::QP_FLAGS };
+                my $nfd = scalar(@$dirs) * PublicInbox::Search::SHARD_COST;
+                $SHARD_NFD += $nfd;
+                if ($SHARD_NFD > $MY_FD_MAX) {
+                        $SHARD_NFD = $nfd;
+                        %SRCH = ();
+                }
                 my $first = shift @$dirs;
-                my $slow_phrase = -f "$first/iamchert";
-                $new->{xdb} = $X->{Database}->new($first);
-                for (@$dirs) {
-                        $slow_phrase ||= -f "$_/iamchert";
-                        $new->{xdb}->add_database($X->{Database}->new($_));
+                for my $retried (0, 1) {
+                        my $slow_phrase = -f "$first/iamchert";
+                        eval {
+                                $new->{xdb} = $X->{Database}->new($first);
+                                for (@$dirs) {
+                                        $slow_phrase ||= -f "$_/iamchert";
+                                        $new->{xdb}->add_database(
+                                                        $X->{Database}->new($_))
+                                }
+                        };
+                        last unless $@;
+                        if ($retried) {
+                                die "E: $@\n";
+                        } else { # may be EMFILE/ENFILE/ENOMEM....
+                                warn "W: $@, retrying...\n";
+                                %SRCH = ();
+                                $SHARD_NFD = $nfd;
+                        }
+                        $slow_phrase or $new->{qp_flags}
+                                |= PublicInbox::Search::FLAG_PHRASE();
                 }
-                $slow_phrase or
-                        $new->{qp_flags} |= PublicInbox::Search::FLAG_PHRASE();
                 bless $new, $req->{c} ? 'PublicInbox::CodeSearch' :
                                         'PublicInbox::Search';
                 $new->{qp} = $new->qparse_new;
-                $new;
+                $SRCH{$key} = $new;
         };
         $req->{srch}->{xdb}->reopen unless $new;
         $req->{Q} && !$req->{srch}->{qp_extra_done} and
@@ -304,7 +324,7 @@ sub start (@) {
         my $c = getsockopt(local $in = \*STDIN, SOL_SOCKET, SO_TYPE);
         unpack('i', $c) == SOCK_SEQPACKET or die 'stdin is not SOCK_SEQPACKET';
 
-        local (%SRCH, %WORKERS);
+        local (%SRCH, %WORKERS, $SHARD_NFD, $MY_FD_MAX);
         PublicInbox::Search::load_xapian();
         $GLP->getoptionsfromarray(\@argv, my $opt = { j => 1 }, 'j=i') or
                 die 'bad args';
@@ -313,6 +333,10 @@ sub start (@) {
         for (@PublicInbox::DS::UNBLOCKABLE, POSIX::SIGUSR1) {
                 $workerset->delset($_) or die "delset($_): $!";
         }
+        $MY_FD_MAX = PublicInbox::Search::ulimit_n //
+                die "E: unable to get RLIMIT_NOFILE: $!";
+        warn "W: RLIMIT_NOFILE=$MY_FD_MAX too low\n" if $MY_FD_MAX < 72;
+        $MY_FD_MAX -= 64;
 
         local $nworker = $opt->{j};
         return recv_loop() if $nworker == 0;
diff --git a/lib/PublicInbox/XapHelperCxx.pm b/lib/PublicInbox/XapHelperCxx.pm
index 74852ad1..922bd583 100644
--- a/lib/PublicInbox/XapHelperCxx.pm
+++ b/lib/PublicInbox/XapHelperCxx.pm
@@ -34,6 +34,7 @@ my $ldflags = '-Wl,-O1';
 $ldflags .= ' -Wl,--compress-debug-sections=zlib' if $^O ne 'openbsd';
 my $xflags = ($ENV{CXXFLAGS} // '-Wall -ggdb3 -pipe') . ' ' .
         ' -DTHREADID=' . PublicInbox::Search::THREADID .
+        ' -DSHARD_COST=' . PublicInbox::Search::SHARD_COST .
         ' -DXH_SPEC="'.join('',
                 map { s/=.*/:/; $_ } @PublicInbox::Search::XH_SPEC) . '" ' .
         ($ENV{LDFLAGS} // $ldflags);
diff --git a/lib/PublicInbox/khashl.h b/lib/PublicInbox/khashl.h
new file mode 100644
index 00000000..170b81ff
--- /dev/null
+++ b/lib/PublicInbox/khashl.h
@@ -0,0 +1,502 @@
+/* The MIT License
+
+   Copyright (c) 2019-2023 by Attractive Chaos <attractor@live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+#ifndef __AC_KHASHL_H
+#define __AC_KHASHL_H
+
+#define AC_VERSION_KHASHL_H "0.2"
+
+typedef uint32_t khint32_t;
+typedef uint64_t khint64_t;
+
+typedef khint32_t khint_t;
+typedef khint_t khiter_t;
+
+#define kh_inline inline
+#define KH_LOCAL static kh_inline
+
+#ifndef kcalloc
+#define kcalloc(N,Z) xcalloc(N,Z)
+#endif
+#ifndef kfree
+#define kfree(P) free(P)
+#endif
+
+/****************************
+ * Simple private functions *
+ ****************************/
+
+#define __kh_used(flag, i)       (flag[i>>5] >> (i&0x1fU) & 1U)
+#define __kh_set_used(flag, i)   (flag[i>>5] |= 1U<<(i&0x1fU))
+#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU)))
+
+#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5)
+
+static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
+
+/*******************
+ * Hash table base *
+ *******************/
+
+#define __KHASHL_TYPE(HType, khkey_t) \
+        typedef struct HType { \
+                khint_t bits, count; \
+                khint32_t *used; \
+                khkey_t *keys; \
+        } HType;
+
+#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \
+        extern HType *prefix##_init(void); \
+        extern void prefix##_destroy(HType *h); \
+        extern void prefix##_clear(HType *h); \
+        extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \
+        extern void prefix##_resize(HType *h, khint_t new_n_buckets); \
+        extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \
+        extern void prefix##_del(HType *h, khint_t k);
+
+#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
+        SCOPE HType *prefix##_init(void) { \
+                return (HType*)kcalloc(1, sizeof(HType)); \
+        } \
+        SCOPE void prefix##_release(HType *h) { \
+                kfree((void *)h->keys); kfree(h->used); \
+        } \
+        SCOPE void prefix##_destroy(HType *h) { \
+                if (!h) return; \
+                prefix##_release(h); \
+                kfree(h); \
+        } \
+        SCOPE void prefix##_clear(HType *h) { \
+                if (h && h->used) { \
+                        khint_t n_buckets = (khint_t)1U << h->bits; \
+                        memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \
+                        h->count = 0; \
+                } \
+        }
+
+#define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+        SCOPE khint_t prefix##_getp_core(const HType *h, const khkey_t *key, khint_t hash) { \
+                khint_t i, last, n_buckets, mask; \
+                if (!h->keys) return 0; \
+                n_buckets = (khint_t)1U << h->bits; \
+                mask = n_buckets - 1U; \
+                i = last = __kh_h2b(hash, h->bits); \
+                while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
+                        i = (i + 1U) & mask; \
+                        if (i == last) return n_buckets; \
+                } \
+                return !__kh_used(h->used, i)? n_buckets : i; \
+        } \
+        SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { return prefix##_getp_core(h, key, __hash_fn(*key)); } \
+        SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp_core(h, &key, __hash_fn(key)); }
+
+#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+        SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { \
+                khint32_t *new_used = NULL; \
+                khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \
+                while ((x >>= 1) != 0) ++j; \
+                if (new_n_buckets & (new_n_buckets - 1)) ++j; \
+                new_bits = j > 2? j : 2; \
+                new_n_buckets = (khint_t)1U << new_bits; \
+                if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return; /* noop, requested size is too small */ \
+                new_used = (khint32_t*)kcalloc(__kh_fsize(new_n_buckets), sizeof(khint32_t)); \
+                n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \
+                if (n_buckets < new_n_buckets) { /* expand */ \
+                        h->keys = (khkey_t *)xreallocarray(h->keys, \
+                                        new_n_buckets, sizeof(khkey_t)); \
+                } /* otherwise shrink */ \
+                new_mask = new_n_buckets - 1; \
+                for (j = 0; j != n_buckets; ++j) { \
+                        khkey_t key; \
+                        if (!__kh_used(h->used, j)) continue; \
+                        key = h->keys[j]; \
+                        __kh_set_unused(h->used, j); \
+                        while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
+                                khint_t i; \
+                                i = __kh_h2b(__hash_fn(key), new_bits); \
+                                while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \
+                                __kh_set_used(new_used, i); \
+                                if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \
+                                        { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
+                                        __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \
+                                } else { /* write the element and jump out of the loop */ \
+                                        h->keys[i] = key; \
+                                        break; \
+                                } \
+                        } \
+                } \
+                if (n_buckets > new_n_buckets) /* shrink the hash table */ \
+                        h->keys = (khkey_t *)xreallocarray(h->keys, \
+                                        new_n_buckets, sizeof(khkey_t)); \
+                kfree(h->used); /* free the working space */ \
+                h->used = new_used, h->bits = new_bits; \
+        }
+
+#define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+        SCOPE khint_t prefix##_putp_core(HType *h, const khkey_t *key, khint_t hash, int *absent) { \
+                khint_t n_buckets, i, last, mask; \
+                n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \
+                *absent = -1; \
+                if (h->count >= (n_buckets>>1) + (n_buckets>>2)) { /* rehashing */ \
+                        prefix##_resize(h, n_buckets + 1U); \
+                        n_buckets = (khint_t)1U<<h->bits; \
+                } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
+                mask = n_buckets - 1; \
+                i = last = __kh_h2b(hash, h->bits); \
+                while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
+                        i = (i + 1U) & mask; \
+                        if (i == last) break; \
+                } \
+                if (!__kh_used(h->used, i)) { /* not present at all */ \
+                        h->keys[i] = *key; \
+                        __kh_set_used(h->used, i); \
+                        ++h->count; \
+                        *absent = 1; \
+                } else *absent = 0; /* Don't touch h->keys[i] if present */ \
+                return i; \
+        } \
+        SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { return prefix##_putp_core(h, key, __hash_fn(*key), absent); } \
+        SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp_core(h, &key, __hash_fn(key), absent); }
+
+#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \
+        SCOPE int prefix##_del(HType *h, khint_t i) { \
+                khint_t j = i, k, mask, n_buckets; \
+                if (!h->keys) return 0; \
+                n_buckets = (khint_t)1U<<h->bits; \
+                mask = n_buckets - 1U; \
+                while (1) { \
+                        j = (j + 1U) & mask; \
+                        if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \
+                        k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \
+                        if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \
+                                h->keys[i] = h->keys[j], i = j; \
+                } \
+                __kh_set_unused(h->used, i); \
+                --h->count; \
+                return 1; \
+        }
+
+#define KHASHL_DECLARE(HType, prefix, khkey_t) \
+        __KHASHL_TYPE(HType, khkey_t) \
+        __KHASHL_PROTOTYPES(HType, prefix, khkey_t)
+
+#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+        __KHASHL_TYPE(HType, khkey_t) \
+        __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
+        __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+        __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+        __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+        __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn)
+
+#define KHASHE_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+        KHASHL_INIT(KH_LOCAL, HType##_sub, prefix##_sub, khkey_t, __hash_fn, __hash_eq) \
+        typedef struct HType { \
+                khint64_t count:54, bits:8; \
+                HType##_sub *sub; \
+        } HType; \
+        SCOPE HType *prefix##_init_sub(HType *g, size_t bits) { \
+                g->bits = bits; \
+                g->sub = (HType##_sub*)kcalloc(1U<<bits, sizeof(*g->sub)); \
+                return g; \
+        } \
+        SCOPE HType *prefix##_init(void) { \
+                HType *g; \
+                g = (HType*)kcalloc(1, sizeof(*g)); \
+                return prefix##_init_sub(g, 0); /* unsure about default */ \
+        } \
+        SCOPE void prefix##_release(HType *g) { \
+                int t; \
+                for (t = 0; t < 1<<g->bits; ++t) \
+                        prefix##_sub_release(&g->sub[t]); \
+                kfree(g->sub); \
+        } \
+        SCOPE void prefix##_destroy(HType *g) { \
+                if (!g) return; \
+                prefix##_release(g); \
+                kfree(g); \
+        } \
+        SCOPE void prefix##_clear(HType *g) { \
+                int t; \
+                if (!g) return; \
+                for (t = 0; t < 1<<g->bits; ++t) \
+                        prefix##_sub_clear(&g->sub[t]); \
+        } \
+        SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \
+                khint_t hash, low, ret; \
+                kh_ensitr_t r; \
+                HType##_sub *h; \
+                hash = __hash_fn(*key); \
+                low = hash & ((1U<<g->bits) - 1); \
+                h = &g->sub[low]; \
+                ret = prefix##_sub_getp_core(h, key, hash); \
+                if (ret >= kh_end(h)) r.sub = low, r.pos = (khint_t)-1; \
+                else r.sub = low, r.pos = ret; \
+                return r; \
+        } \
+        SCOPE kh_ensitr_t prefix##_get(const HType *g, const khkey_t key) { return prefix##_getp(g, &key); } \
+        SCOPE kh_ensitr_t prefix##_putp(HType *g, const khkey_t *key, int *absent) { \
+                khint_t hash, low, ret; \
+                kh_ensitr_t r; \
+                HType##_sub *h; \
+                hash = __hash_fn(*key); \
+                low = hash & ((1U<<g->bits) - 1); \
+                h = &g->sub[low]; \
+                ret = prefix##_sub_putp_core(h, key, hash, absent); \
+                if (*absent) ++g->count; \
+                if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \
+                else r.sub = low, r.pos = ret; \
+                return r; \
+        } \
+        SCOPE kh_ensitr_t prefix##_put(HType *g, const khkey_t key, int *absent) { return prefix##_putp(g, &key, absent); } \
+        SCOPE int prefix##_del(HType *g, kh_ensitr_t itr) { \
+                HType##_sub *h = &g->sub[itr.sub]; \
+                int ret; \
+                ret = prefix##_sub_del(h, itr.pos); \
+                if (ret) --g->count; \
+                return ret; \
+        }
+
+/*****************************
+ * More convenient interface *
+ *****************************/
+
+#define __kh_packed /* noop, we use -Werror=address-of-packed-member */
+#define __kh_cached_hash(x) ((x).hash)
+
+#define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+        typedef struct { khkey_t key; } __kh_packed HType##_s_bucket_t; \
+        static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \
+        static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \
+        KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \
+        SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \
+        SCOPE void prefix##_release(HType *h) { prefix##_s_release(h); } \
+        SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \
+        SCOPE void prefix##_clear(HType *h) { prefix##_s_clear(h); } \
+        SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \
+        SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \
+        SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \
+        SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } \
+
+#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+        typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \
+        static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \
+        static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \
+        KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \
+        SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \
+        SCOPE void prefix##_release(HType *h) { prefix##_m_release(h); } \
+        SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \
+        SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } \
+        SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_m_resize(h, new_n_buckets); } \
+        SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \
+        SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \
+        SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \
+
+#define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+        typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \
+        static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
+        KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \
+        SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \
+        SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \
+        SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \
+        SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \
+        SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); }
+
+#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+        typedef struct { khkey_t key; kh_val_t val; khint_t hash; } __kh_packed HType##_cm_bucket_t; \
+        static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
+        KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \
+        SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \
+        SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \
+        SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \
+        SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \
+        SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); }
+
+#define KHASHE_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+        typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \
+        static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \
+        static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \
+        KHASHE_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \
+        SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \
+        SCOPE void prefix##_release(HType *h) { prefix##_m_release(h); } \
+        SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \
+        SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } \
+        SCOPE void prefix##_resize(HType *h, khint_t ignore) { /* noop */ } \
+        SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \
+        SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_m_del(h, k); } \
+        SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \
+
+/**************************
+ * Public macro functions *
+ **************************/
+
+#define kh_bucket(h, x) ((h)->keys[x])
+
+/*! @function
+  @abstract     Get the number of elements in the hash table
+  @param  h     Pointer to the hash table
+  @return       Number of elements in the hash table [khint_t]
+ */
+#define kh_size(h) ((h)->count)
+
+#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U)
+
+/*! @function
+  @abstract     Get the end iterator
+  @param  h     Pointer to the hash table
+  @return       The end iterator [khint_t]
+ */
+#define kh_end(h) kh_capacity(h)
+
+/*! @function
+  @abstract     Get key given an iterator
+  @param  h     Pointer to the hash table
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Key [type of keys]
+ */
+#define kh_key(h, x) ((h)->keys[x].key)
+
+/*! @function
+  @abstract     Get value given an iterator
+  @param  h     Pointer to the hash table
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Value [type of values]
+  @discussion   For hash sets, calling this results in segfault.
+ */
+#define kh_val(h, x) ((h)->keys[x].val)
+
+/*! @function
+  @abstract     Alias of kh_val()
+ */
+#define kh_value(h, x) kh_val(h, x)
+
+/*! @function
+  @abstract     Test whether a bucket contains data.
+  @param  h     Pointer to the hash table
+  @param  x     Iterator to the bucket [khint_t]
+  @return       1 if containing data; 0 otherwise [int]
+ */
+#define kh_exist(h, x) __kh_used((h)->used, (x))
+
+#define kh_ens_key(g, x) kh_key(&(g)->sub[(x).sub], (x).pos)
+#define kh_ens_val(g, x) kh_val(&(g)->sub[(x).sub], (x).pos)
+#define kh_ens_exist(g, x) kh_exist(&(g)->sub[(x).sub], (x).pos)
+#define kh_ens_is_end(x) ((x).pos == (khint_t)-1)
+#define kh_ens_size(g) ((g)->count)
+
+/**************************************
+ * Common hash and equality functions *
+ **************************************/
+
+#define kh_eq_generic(a, b) ((a) == (b))
+#define kh_eq_str(a, b) (strcmp((a), (b)) == 0)
+#define kh_hash_dummy(x) ((khint_t)(x))
+
+static kh_inline khint_t kh_hash_uint32(khint_t key) {
+        key += ~(key << 15);
+        key ^=  (key >> 10);
+        key +=  (key << 3);
+        key ^=  (key >> 6);
+        key += ~(key << 11);
+        key ^=  (key >> 16);
+        return key;
+}
+
+static kh_inline khint_t kh_hash_uint64(khint64_t key) {
+        key = ~key + (key << 21);
+        key = key ^ key >> 24;
+        key = (key + (key << 3)) + (key << 8);
+        key = key ^ key >> 14;
+        key = (key + (key << 2)) + (key << 4);
+        key = key ^ key >> 28;
+        key = key + (key << 31);
+        return (khint_t)key;
+}
+
+#define KH_FNV_SEED 11
+
+static kh_inline khint_t kh_hash_str(const char *s) { /* FNV1a */
+        khint_t h = KH_FNV_SEED ^ 2166136261U;
+        const unsigned char *t = (const unsigned char*)s;
+        for (; *t; ++t)
+                h ^= *t, h *= 16777619;
+        return h;
+}
+
+static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) {
+        khint_t h = KH_FNV_SEED ^ 2166136261U;
+        int i;
+        for (i = 0; i < len; ++i)
+                h ^= s[i], h *= 16777619;
+        return h;
+}
+
+/*! @function
+  @abstract     Get the start iterator
+  @param  h     Pointer to the hash table
+  @return       The start iterator [khint_t]
+ */
+#define kh_begin(h) (khint_t)(0)
+
+/*! @function
+  @abstract     Iterate over the entries in the hash table
+  @param  h     Pointer to the hash table
+  @param  kvar  Variable to which key will be assigned
+  @param  vvar  Variable to which value will be assigned
+  @param  code  Block of code to execute
+ */
+#define kh_foreach(h, kvar, vvar, code) { khint_t __i;                \
+        for (__i = kh_begin(h); __i != kh_end(h); ++__i) {        \
+                if (!kh_exist(h,__i)) continue;                        \
+                (kvar) = kh_key(h,__i);                                \
+                (vvar) = kh_val(h,__i);                                \
+                code;                                                \
+        } }
+
+#define kh_ens_foreach(g, kvar, vvar, code) do { \
+        size_t t; \
+        for (t = 0; t < 1<<g->bits; ++t) \
+                kh_foreach(&g->sub[t], kvar, vvar, code); \
+} while (0)
+
+#define kh_ens_foreach_value(g, vvar, code) do { \
+        size_t t; \
+        for (t = 0; t < 1<<g->bits; ++t) \
+                kh_foreach_value(&g->sub[t], vvar, code); \
+} while (0)
+
+/*! @function
+  @abstract     Iterate over the values in the hash table
+  @param  h     Pointer to the hash table
+  @param  vvar  Variable to which value will be assigned
+  @param  code  Block of code to execute
+ */
+#define kh_foreach_value(h, vvar, code) { khint_t __i;                \
+        for (__i = kh_begin(h); __i != kh_end(h); ++__i) {        \
+                if (!kh_exist(h,__i)) continue;                        \
+                (vvar) = kh_val(h,__i);                                \
+                code;                                                \
+        } }
+
+#endif /* __AC_KHASHL_H */
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index a30a8768..51ab48bf 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -7,7 +7,7 @@
  * this is not linked to Perl in any way.
  * C (not C++) is used as much as possible to lower the contribution
  * barrier for hackers who mainly know C (this includes the maintainer).
- * Yes, that means we use C stdlib stuff like hsearch and open_memstream
+ * Yes, that means we use C stdlib stuff like open_memstream
  * instead their equivalents in the C++ stdlib :P
  * Everything here is an unstable internal API of public-inbox and
  * NOT intended for ordinary users; only public-inbox hackers
@@ -15,6 +15,9 @@
 #ifndef _ALL_SOURCE
 #        define _ALL_SOURCE
 #endif
+#ifndef _GNU_SOURCE
+#        define _GNU_SOURCE
+#endif
 #if defined(__NetBSD__) && !defined(_OPENBSD_SOURCE) // for reallocarray(3)
 #        define _OPENBSD_SOURCE
 #endif
@@ -34,7 +37,6 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
-#include <search.h>
 #include <signal.h>
 #include <stddef.h>
 #include <stdint.h>
@@ -83,6 +85,62 @@
 #define ABORT(...) do { warnx(__VA_ARGS__); abort(); } while (0)
 #define EABORT(...) do { warn(__VA_ARGS__); abort(); } while (0)
 
+static void *xcalloc(size_t nmemb, size_t size)
+{
+        void *ret = calloc(nmemb, size);
+        if (!ret) EABORT("calloc(%zu, %zu)", nmemb, size);
+        return ret;
+}
+
+#if defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
+                MY_VER(__GLIBC__, __GLIBC_MINOR__, 0) >= MY_VER(2, 28, 0)
+#        define HAVE_REALLOCARRAY 1
+#elif defined(__OpenBSD__) || defined(__DragonFly__) || \
+                defined(__FreeBSD__) || defined(__NetBSD__)
+#        define HAVE_REALLOCARRAY 1
+#endif
+
+static void *xreallocarray(void *ptr, size_t nmemb, size_t size)
+{
+#ifdef HAVE_REALLOCARRAY
+        void *ret = reallocarray(ptr, nmemb, size);
+#else // can't rely on __builtin_mul_overflow in gcc 4.x :<
+        void *ret = NULL;
+        if (nmemb && size > SIZE_MAX / nmemb)
+                errno = ENOMEM;
+        else
+                ret = realloc(ptr, nmemb * size);
+#endif
+        if (!ret) EABORT("reallocarray(..., %zu, %zu)", nmemb, size);
+        return ret;
+}
+
+#include "khashl.h"
+
+struct srch {
+        int ckey_len; // int for comparisons
+        unsigned qp_flags;
+        bool qp_extra_done;
+        Xapian::Database *db;
+        Xapian::QueryParser *qp;
+        unsigned char ckey[]; // $shard_path0\0$shard_path1\0...
+};
+
+static khint_t srch_hash(const struct srch *srch)
+{
+        return kh_hash_bytes(srch->ckey_len, srch->ckey);
+}
+
+static int srch_eq(const struct srch *a, const struct srch *b)
+{
+        return a->ckey_len == b->ckey_len ?
+                !memcmp(a->ckey, b->ckey, (size_t)a->ckey_len) : 0;
+}
+
+KHASHL_CSET_INIT(KH_LOCAL, srch_set, srch_set, struct srch *,
+                srch_hash, srch_eq)
+static srch_set *srch_cache;
+static long my_fd_max, shard_nfd;
 // sock_fd is modified in signal handler, yes, it's SOCK_SEQPACKET
 static volatile int sock_fd = STDIN_FILENO;
 static sigset_t fullset, workerset;
@@ -91,7 +149,6 @@ static bool alive = true;
 static FILE *orig_err = stderr;
 #endif
 static int orig_err_fd = -1;
-static void *srch_tree; // tsearch + tdelete + twalk
 static pid_t *worker_pids; // nr => pid
 #define WORKER_MAX USHRT_MAX
 static unsigned long nworker, nworker_hwm;
@@ -111,15 +168,6 @@ enum exc_iter {
         ITER_ABORT
 };
 
-struct srch {
-        int paths_len; // int for comparisons
-        unsigned qp_flags;
-        bool qp_extra_done;
-        Xapian::Database *db;
-        Xapian::QueryParser *qp;
-        char paths[]; // $shard_path0\0$shard_path1\0...
-};
-
 #define MY_ARG_MAX 256
 typedef bool (*cmd)(struct req *);
 
@@ -128,6 +176,7 @@ struct req { // argv and pfxv point into global rbuf
         char *argv[MY_ARG_MAX];
         char *pfxv[MY_ARG_MAX]; // -A <prefix>
         char *qpfxv[MY_ARG_MAX]; // -Q <user_prefix>[:=]<INTERNAL_PREFIX>
+        char *dirv[MY_ARG_MAX]; // -d /path/to/XDB(shard)
         size_t *lenv; // -A <prefix>LENGTH
         struct srch *srch;
         char *Pgit_dir;
@@ -139,9 +188,7 @@ struct req { // argv and pfxv point into global rbuf
         unsigned long timeout_sec;
         size_t nr_out;
         long sort_col; // value column, negative means BoolWeight
-        int argc;
-        int pfxc;
-        int qpfxc;
+        int argc, pfxc, qpfxc, dirc;
         FILE *fp[2]; // [0] response pipe or sock, [1] status/errors (optional)
         bool has_input; // fp[0] is bidirectional
         bool collapse_threads;
@@ -375,25 +422,6 @@ static size_t off2size(off_t n)
         return (size_t)n;
 }
 
-static char *hsearch_enter_key(char *s)
-{
-#if defined(__OpenBSD__) || defined(__DragonFly__)
-        // hdestroy frees each key on some platforms,
-        // so give it something to free:
-        char *ret = strdup(s);
-        if (!ret) err(EXIT_FAILURE, "strdup");
-        return ret;
-// AFAIK there's no way to detect musl, assume non-glibc Linux is musl:
-#elif defined(__GLIBC__) || defined(__linux__) || \
-        defined(__FreeBSD__) || defined(__NetBSD__)
-        // do nothing on these platforms
-#else
-#warning untested platform detected, unsure if hdestroy(3) frees keys
-#warning contact us at meta@public-inbox.org if you get segfaults
-#endif
-        return s;
-}
-
 // for test usage only, we need to ensure the compiler supports
 // __cleanup__ when exceptions are thrown
 struct inspect { struct req *req; };
@@ -512,15 +540,6 @@ again:
         return false;
 }
 
-static int srch_cmp(const void *pa, const void *pb) // for tfind|tsearch
-{
-        const struct srch *a = (const struct srch *)pa;
-        const struct srch *b = (const struct srch *)pb;
-        int diff = a->paths_len - b->paths_len;
-
-        return diff ? diff : memcmp(a->paths, b->paths, (size_t)a->paths_len);
-}
-
 static bool is_chert(const char *dir)
 {
         char iamchert[PATH_MAX];
@@ -534,49 +553,85 @@ static bool is_chert(const char *dir)
         return false;
 }
 
-static bool srch_init(struct req *req)
+static void srch_free(struct srch *srch)
+{
+        delete srch->qp;
+        delete srch->db;
+        free(srch);
+}
+
+static void srch_cache_renew(struct srch *keep)
+{
+        khint_t k;
+
+        // can't delete while iterating, so just free each + clear
+        for (k = kh_begin(srch_cache); k != kh_end(srch_cache); k++) {
+                if (!kh_exist(srch_cache, k)) continue;
+                struct srch *cur = kh_key(srch_cache, k);
+
+                if (cur != keep)
+                        srch_free(cur);
+        }
+        srch_set_cs_clear(srch_cache);
+        if (keep) {
+                int absent;
+                k = srch_set_put(srch_cache, keep, &absent);
+                assert(absent);
+                assert(k < kh_end(srch_cache));
+        }
+}
+
+static void srch_init(struct req *req)
 {
-        char *dirv[MY_ARG_MAX];
         int i;
         struct srch *srch = req->srch;
-        int dirc = (int)SPLIT2ARGV(dirv, srch->paths, (size_t)srch->paths_len);
         const unsigned FLAG_PHRASE = Xapian::QueryParser::FLAG_PHRASE;
-        srch->qp_flags = FLAG_PHRASE |
-                        Xapian::QueryParser::FLAG_BOOLEAN |
+        srch->qp_flags = Xapian::QueryParser::FLAG_BOOLEAN |
                         Xapian::QueryParser::FLAG_LOVEHATE |
                         Xapian::QueryParser::FLAG_WILDCARD;
-        if (is_chert(dirv[0]))
-                srch->qp_flags &= ~FLAG_PHRASE;
-        try {
-                srch->db = new Xapian::Database(dirv[0]);
-        } catch (...) {
-                warn("E: Xapian::Database(%s)", dirv[0]);
-                return false;
+        long nfd = req->dirc * SHARD_COST;
+
+        shard_nfd += nfd;
+        if (shard_nfd > my_fd_max) {
+                srch_cache_renew(srch);
+                shard_nfd = nfd;
         }
-        try {
-                for (i = 1; i < dirc; i++) {
-                        if (srch->qp_flags & FLAG_PHRASE && is_chert(dirv[i]))
+        for (int retried = 0; retried < 2; retried++) {
+                srch->qp_flags |= FLAG_PHRASE;
+                i = 0;
+                try {
+                        srch->db = new Xapian::Database(req->dirv[i]);
+                        if (is_chert(req->dirv[0]))
                                 srch->qp_flags &= ~FLAG_PHRASE;
-                        srch->db->add_database(Xapian::Database(dirv[i]));
+                        for (i = 1; i < req->dirc; i++) {
+                                const char *dir = req->dirv[i];
+                                if (srch->qp_flags & FLAG_PHRASE &&
+                                                is_chert(dir))
+                                        srch->qp_flags &= ~FLAG_PHRASE;
+                                srch->db->add_database(Xapian::Database(dir));
+                        }
+                        break;
+                } catch (const Xapian::Error & e) {
+                        warnx("E: Xapian::Error: %s (%s)",
+                                e.get_description().c_str(), req->dirv[i]);
+                } catch (...) { // does this happen?
+                        warn("E: add_database(%s)", req->dirv[i]);
+                }
+                if (retried) {
+                        errx(EXIT_FAILURE, "E: can't open %s", req->dirv[i]);
+                } else {
+                        warnx("retrying...");
+                        if (srch->db)
+                                delete srch->db;
+                        srch->db = NULL;
+                        srch_cache_renew(srch);
                 }
-        } catch (...) {
-                warn("E: add_database(%s)", dirv[i]);
-                return false;
-        }
-        try {
-                srch->qp = new Xapian::QueryParser;
-        } catch (...) {
-                perror("E: Xapian::QueryParser");
-                return false;
         }
+        // these will raise and die on ENOMEM or other errors
+        srch->qp = new Xapian::QueryParser;
         srch->qp->set_default_op(Xapian::Query::OP_AND);
         srch->qp->set_database(*srch->db);
-        try {
-                srch->qp->set_stemmer(Xapian::Stem("english"));
-        } catch (...) {
-                perror("E: Xapian::Stem");
-                return false;
-        }
+        srch->qp->set_stemmer(Xapian::Stem("english"));
         srch->qp->set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
         srch->qp->SET_MAX_EXPANSION(100);
 
@@ -584,7 +639,6 @@ static bool srch_init(struct req *req)
                 qp_init_code_search(srch->qp); // CodeSearch.pm
         else
                 qp_init_mail_search(srch->qp); // Search.pm
-        return true;
 }
 
 // setup query parser for altid and arbitrary headers
@@ -612,14 +666,6 @@ static void srch_init_extra(struct req *req)
         req->srch->qp_extra_done = true;
 }
 
-static void free_srch(void *p) // tdestroy
-{
-        struct srch *srch = (struct srch *)p;
-        delete srch->qp;
-        delete srch->db;
-        free(srch);
-}
-
 static void dispatch(struct req *req)
 {
         int c;
@@ -630,7 +676,6 @@ static void dispatch(struct req *req)
         } kbuf;
         char *end;
         FILE *kfp;
-        struct srch **s;
         req->threadid = ULLONG_MAX;
         for (c = 0; c < (int)MY_ARRAY_SIZE(cmds); c++) {
                 if (cmds[c].fn_len == size &&
@@ -644,7 +689,7 @@ static void dispatch(struct req *req)
         kfp = open_memstream(&kbuf.ptr, &size);
         if (!kfp) err(EXIT_FAILURE, "open_memstream(kbuf)");
         // write padding, first (contents don't matter)
-        fwrite(&req->argv[0], offsetof(struct srch, paths), 1, kfp);
+        fwrite(&req->argv[0], offsetof(struct srch, ckey), 1, kfp);
 
         // global getopt variables:
         optopt = 0;
@@ -656,7 +701,11 @@ static void dispatch(struct req *req)
                 switch (c) {
                 case 'a': req->asc = true; break;
                 case 'c': req->code_search = true; break;
-                case 'd': fwrite(optarg, strlen(optarg) + 1, 1, kfp); break;
+                case 'd':
+                        req->dirv[req->dirc++] = optarg;
+                        if (MY_ARG_MAX == req->dirc) ABORT("too many -d");
+                        fprintf(kfp, "-d%c%s%c", 0, optarg, 0);
+                        break;
                 case 'g': req->Pgit_dir = optarg - 1; break; // pad "P" prefix
                 case 'k':
                         req->sort_col = strtol(optarg, &end, 10);
@@ -696,6 +745,7 @@ static void dispatch(struct req *req)
                 case 'Q':
                         req->qpfxv[req->qpfxc++] = optarg;
                         if (MY_ARG_MAX == req->qpfxc) ABORT("too many -Q");
+                        fprintf(kfp, "-Q%c%s%c", 0, optarg, 0);
                         break;
                 default: ABORT("bad switch `-%c'", c);
                 }
@@ -704,22 +754,20 @@ static void dispatch(struct req *req)
         kbuf.srch->db = NULL;
         kbuf.srch->qp = NULL;
         kbuf.srch->qp_extra_done = false;
-        kbuf.srch->paths_len = size - offsetof(struct srch, paths);
-        if (kbuf.srch->paths_len <= 0)
-                ABORT("no -d args");
-        s = (struct srch **)tsearch(kbuf.srch, &srch_tree, srch_cmp);
-        if (!s) err(EXIT_FAILURE, "tsearch"); // likely ENOMEM
-        req->srch = *s;
-        if (req->srch != kbuf.srch) { // reuse existing
-                free_srch(kbuf.srch);
+        kbuf.srch->ckey_len = size - offsetof(struct srch, ckey);
+        if (kbuf.srch->ckey_len <= 0 || !req->dirc)
+                ABORT("no -d args (or too many)");
+
+        int absent;
+        khint_t ki = srch_set_put(srch_cache, kbuf.srch, &absent);
+        assert(ki < kh_end(srch_cache));
+        req->srch = kh_key(srch_cache, ki);
+        if (absent) {
+                srch_init(req);
+        } else {
+                assert(req->srch != kbuf.srch);
+                srch_free(kbuf.srch);
                 req->srch->db->reopen();
-        } else if (!srch_init(req)) {
-                assert(kbuf.srch == *((struct srch **)tfind(
-                                        kbuf.srch, &srch_tree, srch_cmp)));
-                void *del = tdelete(kbuf.srch, &srch_tree, srch_cmp);
-                assert(del);
-                free_srch(kbuf.srch);
-                goto cmd_err; // srch_init already warned
         }
         if (req->qpfxc && !req->srch->qp_extra_done)
                 srch_init_extra(req);
@@ -736,8 +784,6 @@ static void dispatch(struct req *req)
         }
         if (req->timeout_sec)
                 alarm(0);
-cmd_err:
-        return; // just be silent on errors, for now
 }
 
 static void cleanup_pids(void)
@@ -877,10 +923,11 @@ static void start_workers(void)
 static void cleanup_all(void)
 {
         cleanup_pids();
-#ifdef __GLIBC__
-        tdestroy(srch_tree, free_srch);
-        srch_tree = NULL;
-#endif
+        if (!srch_cache)
+                return;
+        srch_cache_renew(NULL);
+        srch_set_destroy(srch_cache);
+        srch_cache = NULL;
 }
 
 static void parent_reopen_logs(void)
@@ -1014,14 +1061,23 @@ int main(int argc, char *argv[])
         socklen_t slen = (socklen_t)sizeof(c);
         stdout_path = getenv("STDOUT_PATH");
         stderr_path = getenv("STDERR_PATH");
+        struct rlimit rl;
 
         if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &c, &slen))
                 err(EXIT_FAILURE, "getsockopt");
         if (c != SOCK_SEQPACKET)
                 errx(EXIT_FAILURE, "stdin is not SOCK_SEQPACKET");
 
+        if (getrlimit(RLIMIT_NOFILE, &rl))
+                err(EXIT_FAILURE, "getrlimit");
+        my_fd_max = rl.rlim_cur;
+        if (my_fd_max < 72)
+                warnx("W: RLIMIT_NOFILE=%ld too low\n", my_fd_max);
+        my_fd_max -= 64;
+
         mail_nrp_init();
         code_nrp_init();
+        srch_cache = srch_set_init();
         atexit(cleanup_all);
 
         if (!STDERR_ASSIGNABLE) {
@@ -1082,8 +1138,7 @@ int main(int argc, char *argv[])
         CHECK(int, 0, sigdelset(&workerset, SIGTERM));
         CHECK(int, 0, sigdelset(&workerset, SIGCHLD));
         nworker_hwm = nworker;
-        worker_pids = (pid_t *)calloc(nworker, sizeof(pid_t));
-        if (!worker_pids) err(EXIT_FAILURE, "calloc");
+        worker_pids = (pid_t *)xcalloc(nworker, sizeof(pid_t));
 
         if (pipe(pipefds)) err(EXIT_FAILURE, "pipe");
         int fl = fcntl(pipefds[1], F_GETFL);
diff --git a/lib/PublicInbox/xh_cidx.h b/lib/PublicInbox/xh_cidx.h
index 311ca05f..8cc6a845 100644
--- a/lib/PublicInbox/xh_cidx.h
+++ b/lib/PublicInbox/xh_cidx.h
@@ -3,11 +3,38 @@
 // This file is only intended to be included by xap_helper.h
 // it implements pieces used by CodeSearchIdx.pm
 
+// TODO: consider making PublicInbox::CodeSearchIdx emit binary
+// (20 or 32-bit) OIDs instead of ASCII hex.  It would require
+// more code in both Perl and C++, though...
+
+// assumes trusted data from same host
+static inline unsigned int hex2uint(char c)
+{
+        switch (c) {
+        case '0' ... '9': return c - '0';
+        case 'a' ... 'f': return c - 'a' + 10;
+        default: return 0xff; // oh well...
+        }
+}
+
+// assumes trusted data from same host
+static kh_inline khint_t sha_hex_hash(const char *hex)
+{
+        khint_t ret = 0;
+
+        for (size_t shift = 32; shift; )
+                ret |= hex2uint(*hex++) << (shift -= 4);
+
+        return ret;
+}
+
+KHASHL_CMAP_INIT(KH_LOCAL, root2id_map, root2id,
+                const char *, const char *,
+                sha_hex_hash, kh_eq_str)
+
 static void term_length_extract(struct req *req)
 {
-        req->lenv = (size_t *)calloc(req->pfxc, sizeof(size_t));
-        if (!req->lenv)
-                EABORT("lenv = calloc(%d %zu)", req->pfxc, sizeof(size_t));
+        req->lenv = (size_t *)xcalloc(req->pfxc, sizeof(size_t));
         for (int i = 0; i < req->pfxc; i++) {
                 char *pfx = req->pfxv[i];
                 // extract trailing digits as length:
@@ -101,6 +128,7 @@ struct dump_roots_tmp {
         void *mm_ptr;
         char **entries;
         struct fbuf wbuf;
+        root2id_map *root2id;
         int root2off_fd;
 };
 
@@ -110,7 +138,8 @@ static void dump_roots_ensure(void *ptr)
         struct dump_roots_tmp *drt = (struct dump_roots_tmp *)ptr;
         if (drt->root2off_fd >= 0)
                 xclose(drt->root2off_fd);
-        hdestroy(); // idempotent
+        if (drt->root2id)
+                root2id_cm_destroy(drt->root2id);
         size_t size = off2size(drt->sb.st_size);
         if (drt->mm_ptr && munmap(drt->mm_ptr, size))
                 EABORT("BUG: munmap(%p, %zu)", drt->mm_ptr, size);
@@ -118,23 +147,21 @@ static void dump_roots_ensure(void *ptr)
         fbuf_ensure(&drt->wbuf);
 }
 
-static bool root2offs_str(struct fbuf *root_offs, Xapian::Document *doc)
+static bool root2offs_str(struct dump_roots_tmp *drt,
+                        struct fbuf *root_offs, Xapian::Document *doc)
 {
         Xapian::TermIterator cur = doc->termlist_begin();
         Xapian::TermIterator end = doc->termlist_end();
-        ENTRY e, *ep;
         fbuf_init(root_offs);
         for (cur.skip_to("G"); cur != end; cur++) {
                 std::string tn = *cur;
                 if (!starts_with(&tn, "G", 1)) break;
-                union { const char *in; char *out; } u;
-                u.in = tn.c_str() + 1;
-                e.key = u.out;
-                ep = hsearch(e, FIND);
-                if (!ep) ABORT("hsearch miss `%s'", e.key);
-                // ep->data is a NUL-terminated string matching /[0-9]+/
+                khint_t i = root2id_get(drt->root2id, tn.c_str() + 1);
+                if (i >= kh_end(drt->root2id))
+                        ABORT("kh get miss `%s'", tn.c_str() + 1);
                 fputc(' ', root_offs->fp);
-                fputs((const char *)ep->data, root_offs->fp);
+                // kh_val(...) is a NUL-terminated string matching /[0-9]+/
+                fputs(kh_val(drt->root2id, i), root_offs->fp);
         }
         fputc('\n', root_offs->fp);
         ERR_CLOSE(root_offs->fp, EXIT_FAILURE); // ENOMEM
@@ -198,7 +225,7 @@ static enum exc_iter dump_roots_iter(struct req *req,
         CLEANUP_FBUF struct fbuf root_offs = {}; // " $ID0 $ID1 $IDx..\n"
         try {
                 Xapian::Document doc = i->get_document();
-                if (!root2offs_str(&root_offs, &doc))
+                if (!root2offs_str(drt, &root_offs, &doc))
                         return ITER_ABORT; // bad request, abort
                 for (int p = 0; p < req->pfxc; p++)
                         dump_roots_term(req, p, drt, &root_offs, &doc);
@@ -226,8 +253,7 @@ static bool cmd_dump_roots(struct req *req)
         if (fstat(drt.root2off_fd, &drt.sb)) // ENOMEM?
                 err(EXIT_FAILURE, "fstat(%s)", root2off_file);
         // each entry is at least 43 bytes ({OIDHEX}\0{INT}\0),
-        // so /32 overestimates the number of expected entries by
-        // ~%25 (as recommended by Linux hcreate(3) manpage)
+        // so /32 overestimates the number of expected entries
         size_t size = off2size(drt.sb.st_size);
         size_t est = (size / 32) + 1; //+1 for "\0" termination
         drt.mm_ptr = mmap(NULL, size, PROT_READ,
@@ -236,20 +262,19 @@ static bool cmd_dump_roots(struct req *req)
                 err(EXIT_FAILURE, "mmap(%zu, %s)", size, root2off_file);
         size_t asize = est * 2;
         if (asize < est) ABORT("too many entries: %zu", est);
-        drt.entries = (char **)calloc(asize, sizeof(char *));
-        if (!drt.entries)
-                err(EXIT_FAILURE, "calloc(%zu * 2, %zu)", est, sizeof(char *));
+        drt.entries = (char **)xcalloc(asize, sizeof(char *));
         size_t tot = split2argv(drt.entries, (char *)drt.mm_ptr, size, asize);
         if (tot <= 0) return false; // split2argv already warned on error
-        if (!hcreate(est))
-                err(EXIT_FAILURE, "hcreate(%zu)", est);
+        drt.root2id = root2id_init();
+        root2id_cm_resize(drt.root2id, est);
         for (size_t i = 0; i < tot; ) {
-                ENTRY e;
-                e.key = hsearch_enter_key(drt.entries[i++]); // dies on ENOMEM
-                e.data = drt.entries[i++];
-                if (!hsearch(e, ENTER))
-                        err(EXIT_FAILURE, "hsearch(%s => %s, ENTER)", e.key,
-                                        (const char *)e.data);
+                int absent;
+                const char *key = drt.entries[i++];
+                khint_t k = root2id_put(drt.root2id, key, &absent);
+                if (!absent)
+                        err(EXIT_FAILURE, "put(%s => %s, ENTER)",
+                                key, drt.entries[i]);
+                kh_val(drt.root2id, k) = drt.entries[i++];
         }
         req->asc = true;
         req->sort_col = -1;