There are still some places where on_destroy isn't suitable, This gets rid of getpid() calls in those cases to reduce syscall costs and additional runtime overhead. --- lib/PublicInbox/DSKQXS.pm | 7 ++++--- lib/PublicInbox/Daemon.pm | 26 +++++++++----------------- lib/PublicInbox/Git.pm | 8 ++++---- lib/PublicInbox/IO.pm | 12 +++++++----- lib/PublicInbox/LeiALE.pm | 7 ++++--- t/spawn.t | 3 ++- 6 files changed, 30 insertions(+), 33 deletions(-) diff --git a/lib/PublicInbox/DSKQXS.pm b/lib/PublicInbox/DSKQXS.pm index f84c196a..dc6621e4 100644 --- a/lib/PublicInbox/DSKQXS.pm +++ b/lib/PublicInbox/DSKQXS.pm @@ -15,6 +15,7 @@ use v5.12; use Symbol qw(gensym); use IO::KQueue; use Errno qw(EAGAIN); +use PublicInbox::OnDestroy; use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT EPOLLET); sub EV_DISPATCH () { 0x0080 } @@ -37,7 +38,8 @@ sub kq_flag ($$) { sub new { my ($class) = @_; - bless { kq => IO::KQueue->new, owner_pid => $$ }, $class; + my $fgen = $PublicInbox::OnDestroy::fork_gen; + bless { kq => IO::KQueue->new, fgen => $fgen }, $class; } # returns a new instance which behaves like signalfd on Linux. @@ -137,9 +139,8 @@ sub ep_wait { sub DESTROY { my ($self) = @_; my $kq = delete $self->{kq} or return; - if (delete($self->{owner_pid}) == $$) { + delete($self->{fgen}) == $PublicInbox::OnDestroy::fork_gen and POSIX::close($$kq); - } } 1; diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index 1cc0c9e6..ec76d6b8 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -352,8 +352,7 @@ EOF return unless defined $pid_file; write_pid($pid_file); - # for ->DESTROY: - bless { pid => $$, pid_file => \$pid_file }, __PACKAGE__; + on_destroy \&unlink_pid_file_safe_ish, \$pid_file; } sub has_busy_clients { # post_loop_do CB @@ -476,7 +475,7 @@ sub upgrade { # $_[0] = signal name or number (unused) warn "BUG: .oldbin suffix exists: $pid_file\n"; return; } - unlink_pid_file_safe_ish($$, $pid_file); + unlink_pid_file_safe_ish(\$pid_file); $pid_file .= '.oldbin'; write_pid($pid_file); } @@ -509,23 +508,20 @@ sub upgrade_aborted { my $file = $pid_file; $file =~ s/\.oldbin\z// or die "BUG: no '.oldbin' suffix in $file"; - unlink_pid_file_safe_ish($$, $pid_file); + unlink_pid_file_safe_ish(\$pid_file); $pid_file = $file; eval { write_pid($pid_file) }; warn $@, "\n" if $@; } -sub unlink_pid_file_safe_ish ($$) { - my ($unlink_pid, $file) = @_; - return unless defined $unlink_pid && $unlink_pid == $$; +sub unlink_pid_file_safe_ish ($) { + my ($fref) = @_; - open my $fh, '<', $file or return; + open my $fh, '<', $$fref or return; local $/ = "\n"; defined(my $read_pid = <$fh>) or return; chomp $read_pid; - if ($read_pid == $unlink_pid) { - Net::Server::Daemonize::unlink_pid_file($file); - } + Net::Server::Daemonize::unlink_pid_file($$fref) if $read_pid == $$; } sub master_quit ($) { @@ -696,7 +692,7 @@ sub run { $nworker = 1; local (%XNETD, %POST_ACCEPT); daemon_prepare($default_listen); - my $for_destroy = daemonize(); + my $unlink_on_leave = daemonize(); # localize GCF2C for tests: local $PublicInbox::GitAsyncCat::GCF2C; @@ -706,7 +702,7 @@ sub run { local %POST_ACCEPT; daemon_loop(); - # ->DESTROY runs when $for_destroy goes out-of-scope + # $unlink_on_leave runs } sub write_pid ($) { @@ -715,8 +711,4 @@ sub write_pid ($) { do_chown($path); } -sub DESTROY { - unlink_pid_file_safe_ish($_[0]->{pid}, ${$_[0]->{pid_file}}); -} - 1; diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index af12f141..aea389e8 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -210,14 +210,14 @@ sub cat_async_retry ($$) { sub gcf_inflight ($) { my ($self) = @_; # FIXME: the first {sock} check can succeed but Perl can complain - # about calling ->owner_pid on an undefined value. Not sure why or - # how this happens but t/imapd.t can complain about it, sometimes. + # about an undefined value. Not sure why or how this happens but + # t/imapd.t can complain about it, sometimes. if ($self->{sock}) { - if (eval { $self->{sock}->owner_pid == $$ }) { + if (eval { $self->{sock}->can_reap }) { return $self->{inflight}; } elsif ($@) { no warnings 'uninitialized'; - warn "E: $self sock=$self->{sock}: owner_pid failed: ". + warn "E: $self sock=$self->{sock}: can_reap failed: ". "$@ (continuing...)"; } delete @$self{qw(sock inflight)}; diff --git a/lib/PublicInbox/IO.pm b/lib/PublicInbox/IO.pm index 5654f3b0..02057600 100644 --- a/lib/PublicInbox/IO.pm +++ b/lib/PublicInbox/IO.pm @@ -10,6 +10,7 @@ our @EXPORT_OK = qw(poll_in read_all try_cat write_file); use Carp qw(croak); use IO::Poll qw(POLLIN); use Errno qw(EINTR EAGAIN); +use PublicInbox::OnDestroy; # don't autodie in top-level for Perl 5.16.3 (and maybe newer versions) # we have our own ->close, so we scope autodie into each sub @@ -23,7 +24,8 @@ sub attach_pid { my ($io, $pid, @cb_arg) = @_; bless $io, __PACKAGE__; # we share $err (and not $self) with awaitpid to avoid a ref cycle - ${*$io}{pi_io_reap} = [ $$, $pid, \(my $err) ]; + ${*$io}{pi_io_reap} = [ $PublicInbox::OnDestroy::fork_gen, + $pid, \(my $err) ]; awaitpid($pid, \&waitcb, \$err, @cb_arg); $io; } @@ -33,9 +35,9 @@ sub attached_pid { ${${*$io}{pi_io_reap} // []}[1]; } -sub owner_pid { +sub can_reap { my ($io) = @_; - ${${*$io}{pi_io_reap} // [-1]}[0]; + ${${*$io}{pi_io_reap} // [-1]}[0] == $PublicInbox::OnDestroy::fork_gen; } # caller cares about error result if they call close explicitly @@ -44,7 +46,7 @@ sub close { my ($io) = @_; my $ret = $io->SUPER::close; my $reap = delete ${*$io}{pi_io_reap}; - return $ret unless $reap && $reap->[0] == $$; + return $ret if ($reap->[0] // -1) != $PublicInbox::OnDestroy::fork_gen; if (defined ${$reap->[2]}) { # reap_pids already reaped asynchronously $? = ${$reap->[2]}; } else { # wait synchronously @@ -56,7 +58,7 @@ sub close { sub DESTROY { my ($io) = @_; my $reap = delete ${*$io}{pi_io_reap}; - if ($reap && $reap->[0] == $$) { + if (($reap->[0] // -1) == $PublicInbox::OnDestroy::fork_gen) { $io->SUPER::close; awaitpid($reap->[1]); } diff --git a/lib/PublicInbox/LeiALE.pm b/lib/PublicInbox/LeiALE.pm index 528de22c..ce03f5b4 100644 --- a/lib/PublicInbox/LeiALE.pm +++ b/lib/PublicInbox/LeiALE.pm @@ -11,6 +11,7 @@ use parent qw(PublicInbox::LeiSearch PublicInbox::Lock); use PublicInbox::Git; use autodie qw(close open rename seek truncate); use PublicInbox::Import; +use PublicInbox::OnDestroy; use PublicInbox::LeiXSearch; use Fcntl qw(SEEK_SET); @@ -41,11 +42,11 @@ sub over {} # undef for xoids_for sub overs_all { # for xoids_for (called only in lei workers?) my ($self) = @_; - my $pid = $$; - if (($self->{owner_pid} // $pid) != $pid) { + my $fgen = $PublicInbox::OnDestroy::fork_gen ; + if (($self->{fgen} // $fgen) != $fgen) { delete($_->{over}) for @{$self->{ibxish}}; } - $self->{owner_pid} = $pid; + $self->{fgen} = $fgen; grep(defined, map { $_->over } @{$self->{ibxish}}); } diff --git a/t/spawn.t b/t/spawn.t index 5b17ed38..45517852 100644 --- a/t/spawn.t +++ b/t/spawn.t @@ -6,6 +6,7 @@ use Test::More; use PublicInbox::Spawn qw(which spawn popen_rd run_qx); require PublicInbox::Sigfd; require PublicInbox::DS; +use PublicInbox::OnDestroy; my $rlimit_map = PublicInbox::Spawn->can('rlimit_map'); { my $true = which('true'); @@ -171,7 +172,7 @@ EOF my @arg; my $fh = popen_rd(['cat'], undef, { 0 => $r }, sub { @arg = @_; warn "x=$$\n" }, 'hi'); - my $pid = fork // BAIL_OUT $!; + my $pid = PublicInbox::OnDestroy::fork_tmp; local $SIG{__WARN__} = sub { _exit(1) }; if ($pid == 0) { local $SIG{__DIE__} = sub { _exit(2) };
getpid() isn't cached by glibc nowadays and system calls are more expensive due to CPU vulnerability mitigations. To ensure we switch to the new semantics properly, introduce a new `on_destroy' function to simplify callers. Furthermore, most OnDestroy correctness is often tied to the process which creates it, so make the new API default to guarded against running in subprocesses. For cases which require running in all children, PublicInbox::OnDestroy::all is provided. --- lib/PublicInbox/CodeSearchIdx.pm | 29 +++++++++++++---------------- lib/PublicInbox/DS.pm | 9 +++++---- lib/PublicInbox/Daemon.pm | 12 ++++++------ lib/PublicInbox/IPC.pm | 12 +++++++----- lib/PublicInbox/LEI.pm | 8 ++++---- lib/PublicInbox/LeiMirror.pm | 25 +++++++++++-------------- lib/PublicInbox/LeiP2q.pm | 2 +- lib/PublicInbox/LeiStore.pm | 3 ++- lib/PublicInbox/LeiTag.pm | 8 ++++---- lib/PublicInbox/Lock.pm | 4 ++-- lib/PublicInbox/MHreader.pm | 4 ++-- lib/PublicInbox/MboxLock.pm | 6 +++--- lib/PublicInbox/OnDestroy.pm | 27 +++++++++++++++++---------- lib/PublicInbox/SearchIdxShard.pm | 2 +- lib/PublicInbox/SpawnPP.pm | 5 +++-- lib/PublicInbox/TestCommon.pm | 4 ++-- lib/PublicInbox/Umask.pm | 2 +- lib/PublicInbox/ViewVCS.pm | 5 +++-- lib/PublicInbox/Watch.pm | 4 ++-- lib/PublicInbox/WwwCoderepo.pm | 2 +- lib/PublicInbox/XapClient.pm | 2 +- lib/PublicInbox/XapHelper.pm | 2 +- lib/PublicInbox/Xapcmd.pm | 2 +- script/public-inbox-init | 2 +- t/lei-sigpipe.t | 4 +--- t/mbox_lock.t | 7 ++++--- t/mh_reader.t | 1 - t/on_destroy.t | 25 ++++++++++++++++--------- xt/net_writer-imap.t | 4 ++-- 29 files changed, 117 insertions(+), 105 deletions(-) diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 570ff64f..6d777bf6 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -368,7 +368,7 @@ sub repo_stored { $did > 0 or die "BUG: $repo_ctx->{repo}->{git_dir}: docid=$did"; my ($c, $p) = PublicInbox::PktOp->pair; $c->{ops}->{shard_done} = [ $self, $repo_ctx, - PublicInbox::OnDestroy->new(\&next_repos, $repo_ctx, $drs)]; + on_destroy(\&next_repos, $repo_ctx, $drs)]; # shard_done fires when all shards are committed my @active = keys %{$repo_ctx->{active}}; $IDX_SHARDS[$_]->wq_io_do('shard_commit', [ $p->{op_p} ]) for @active; @@ -425,7 +425,7 @@ sub fp_start ($$) { open my $refs, '+>', undef; $git->{-repo}->{refs} = $refs; my ($c, $p) = PublicInbox::PktOp->pair; - my $next_on_err = PublicInbox::OnDestroy->new(\&index_next, $self); + my $next_on_err = on_destroy \&index_next, $self; $c->{ops}->{fp_done} = [ $self, $git, $next_on_err ]; $IDX_SHARDS[++$ANY_SHARD % scalar(@IDX_SHARDS)]->wq_io_do('fp_async', [ $p->{op_p}, $refs ], $git->{git_dir}) @@ -664,8 +664,7 @@ sub index_repo { my $repo_ctx = $REPO_CTX = { self => $self, repo => $repo }; delete $git->{-cidx_gits_fini}; # may fire gits_fini my $drs = delete $git->{-cidx_dump_roots_start}; - my $index_done = PublicInbox::OnDestroy->new(\&index_done, - $repo_ctx, $drs); + my $index_done = on_destroy \&index_done, $repo_ctx, $drs; my ($c, $p) = PublicInbox::PktOp->pair; $c->{ops}->{shard_done} = [ $self, $repo_ctx, $index_done ]; for my $n (0..$#shard_in) { @@ -690,7 +689,7 @@ sub ct_fini { # run_git cb sub prep_repo ($$) { my ($self, $git) = @_; return if $DO_QUIT; - my $index_repo = PublicInbox::OnDestroy->new(\&index_repo, $self, $git); + my $index_repo = on_destroy \&index_repo, $self, $git; my $refs = $git->{-repo}->{refs} // die 'BUG: no {-repo}->{refs}'; sysseek($refs, 0, SEEK_SET); open my $roots_fh, '+>', undef; @@ -787,7 +786,7 @@ sub scan_git_dirs ($) { my ($self) = @_; @$SCANQ = () unless $self->{-opt}->{scan}; $GITS_NR = @$SCANQ or return; - my $gits_fini = PublicInbox::OnDestroy->new(\&gits_fini); + my $gits_fini = on_destroy \&gits_fini; $_->{-cidx_gits_fini} = $gits_fini for @$SCANQ; if (my $drs = $TODO{dump_roots_start}) { $_->{-cidx_dump_roots_start} = $drs for @$SCANQ; @@ -859,7 +858,7 @@ sub prep_umask ($) { umask == $um or progress($self, 'using umask from ', $self->{cidx_dir}, ': ', sprintf('0%03o', $um)); - PublicInbox::OnDestroy->new(\&CORE::umask, umask($um)); + on_destroy \&CORE::umask, umask($um); } else { $self->{umask} = umask; # for SearchIdx->with_umask undef; @@ -1083,12 +1082,12 @@ EOM ($JOIN_DT[1]) = ($QRY_STR =~ /\.\.([0-9]{14})\z/); # YYYYmmddHHMMSS ($JOIN_DT[0]) = ($QRY_STR =~ /\Adt:([0-9]{14})/); # YYYYmmddHHMMSS $JOIN_DT[0] //= '19700101'.'000000'; # git uses unsigned times - $TODO{do_join} = PublicInbox::OnDestroy->new(\&do_join, $self); + $TODO{do_join} = on_destroy \&do_join, $self; $TODO{joining} = 1; # keep shards_active() happy - $TODO{dump_ibx_start} = PublicInbox::OnDestroy->new(\&dump_ibx_start, - $self, $TODO{do_join}); - $TODO{dump_roots_start} = PublicInbox::OnDestroy->new( - \&dump_roots_start, $self, $TODO{do_join}); + $TODO{dump_ibx_start} = on_destroy \&dump_ibx_start, + $self, $TODO{do_join}; + $TODO{dump_roots_start} = on_destroy \&dump_roots_start, + $self, $TODO{do_join}; progress($self, "will join in $QRY_STR date range..."); my $id = -1; @IBXQ = map { ++$id } @IBX; @@ -1110,8 +1109,7 @@ sub init_prune ($) { require_progs('prune', 'xapian-delve' => \@delve, sed => \@sed, comm => \@COMM, awk => \@AWK); for (0..$#IDX_SHARDS) { push @delve, "$self->{xpfx}/$_" } - my $run_prune = PublicInbox::OnDestroy->new(\&run_prune, $self, - $TODO{dump_roots_start}); + my $run_prune = on_destroy \&run_prune, $self, $TODO{dump_roots_start}; my ($sort_opt, $sed_opt, $delve_opt); pipe(local $sed_opt->{0}, local $delve_opt->{1}); pipe(local $sort_opt->{0}, local $sed_opt->{1}); @@ -1279,8 +1277,7 @@ sub cidx_run { # main entry point my $restore_umask = prep_umask($self); local $SIGSET = PublicInbox::DS::block_signals( POSIX::SIGTSTP, POSIX::SIGCONT); - my $restore = PublicInbox::OnDestroy->new($$, - \&PublicInbox::DS::sig_setmask, $SIGSET); + my $restore = on_destroy \&PublicInbox::DS::sig_setmask, $SIGSET; local $PRUNE_DONE = []; local $IDXQ = []; local $SCANQ = []; diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm index 8bc8cfb7..a6fec954 100644 --- a/lib/PublicInbox/DS.pm +++ b/lib/PublicInbox/DS.pm @@ -32,9 +32,9 @@ use PublicInbox::Syscall qw(%SIGNUM EPOLLIN EPOLLOUT EPOLLONESHOT EPOLLEXCLUSIVE); use PublicInbox::Tmpfile; use PublicInbox::Select; +use PublicInbox::OnDestroy; use Errno qw(EAGAIN EINVAL ECHILD); use Carp qw(carp croak); -use autodie qw(fork); our @EXPORT_OK = qw(now msg_more awaitpid add_timer add_uniq_timer); my $nextq; # queue for next_tick @@ -679,12 +679,13 @@ sub awaitpid { } } -sub do_fork () { +# for persistent child process +sub fork_persist () { my $seed = rand(0xffffffff); - my $pid = fork; + my $pid = PublicInbox::OnDestroy::fork_tmp; if ($pid == 0) { srand($seed); - eval { Net::SSLeay::randomize() }; + eval { Net::SSLeay::randomize() }; # may not be loaded Reset(); } $pid; diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index e578f2e8..1cc0c9e6 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -21,6 +21,7 @@ use PublicInbox::Git; use PublicInbox::GitAsyncCat; use PublicInbox::Eml; use PublicInbox::Config; +use PublicInbox::OnDestroy; our $SO_ACCEPTFILTER = 0x1000; my @CMD; my ($set_user, $oldset); @@ -338,15 +339,14 @@ EOF }; if ($daemonize) { - my $pid = fork // die "fork: $!"; + my $pid = PublicInbox::OnDestroy::fork_tmp; exit if $pid; - open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!\n"; open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n"; open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n"; POSIX::setsid(); - $pid = fork // die "fork: $!"; + $pid = PublicInbox::OnDestroy::fork_tmp; exit if $pid; } return unless defined $pid_file; @@ -480,9 +480,9 @@ sub upgrade { # $_[0] = signal name or number (unused) $pid_file .= '.oldbin'; write_pid($pid_file); } - my $pid = fork; + my $pid = eval { PublicInbox::OnDestroy::fork_tmp }; if (!defined($pid)) { - warn "fork failed: $!\n"; + warn "fork failed: $! $@\n"; } elsif ($pid == 0) { $ENV{LISTEN_FDS} = scalar @listeners; $ENV{LISTEN_PID} = $$; @@ -545,7 +545,7 @@ sub reap_worker { # awaitpid CB sub start_worker ($) { my ($nr) = @_; return unless @listeners; - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; if ($pid == 0) { undef %WORKERS; local $PublicInbox::DS::Poller; # allow epoll/kqueue diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm index a5cae6f2..ed6d27fd 100644 --- a/lib/PublicInbox/IPC.pm +++ b/lib/PublicInbox/IPC.pm @@ -10,7 +10,7 @@ package PublicInbox::IPC; use v5.12; use parent qw(Exporter); -use autodie qw(close fork pipe read socketpair sysread); +use autodie qw(close pipe read socketpair sysread); use Carp qw(croak); use PublicInbox::DS qw(awaitpid); use PublicInbox::Spawn; @@ -93,6 +93,8 @@ sub ipc_worker_loop ($$$) { } } +sub exit_exception { exit(!!$@) } + # starts a worker if Sereal or Storable is installed sub ipc_worker_spawn { my ($self, $ident, $oldset, $fields, @cb_args) = @_; @@ -102,7 +104,7 @@ sub ipc_worker_spawn { pipe(my $r_res, my $w_res); my $sigset = $oldset // PublicInbox::DS::block_signals(); $self->ipc_atfork_prepare; - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; if ($pid == 0) { delete @$self{qw(-wq_s1 -wq_s2 -wq_workers -wq_ppid)}; $w_req = $r_res = undef; @@ -110,7 +112,7 @@ sub ipc_worker_spawn { $SIG{$_} = 'IGNORE' for (qw(TERM INT QUIT)); local $0 = $ident; # ensure we properly exit even if warn() dies: - my $end = PublicInbox::OnDestroy->new($$, sub { exit(!!$@) }); + my $end = on_destroy \&exit_exception; eval { $fields //= {}; local @$self{keys %$fields} = values(%$fields); @@ -330,7 +332,7 @@ sub _wq_worker_start { my ($self, $oldset, $fields, $one, @cb_args) = @_; my ($bcast1, $bcast2); $one or socketpair($bcast1, $bcast2, AF_UNIX, SOCK_SEQPACKET, 0); - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; if ($pid == 0) { undef $bcast1; delete @$self{qw(-wq_s1 -wq_ppid)}; @@ -340,7 +342,7 @@ sub _wq_worker_start { local $0 = $one ? $self->{-wq_ident} : "$self->{-wq_ident} $self->{-wq_worker_nr}"; # ensure we properly exit even if warn() dies: - my $end = PublicInbox::OnDestroy->new($$, sub { exit(!!$@) }); + my $end = on_destroy \&exit_exception; eval { $fields //= {}; local @$self{keys %$fields} = values(%$fields); diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 81f940fe..7c31ab43 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -9,7 +9,7 @@ package PublicInbox::LEI; use v5.12; use parent qw(PublicInbox::DS PublicInbox::LeiExternal PublicInbox::LeiQuery); -use autodie qw(bind chdir fork open pipe socket socketpair syswrite unlink); +use autodie qw(bind chdir open pipe socket socketpair syswrite unlink); use Getopt::Long (); use Socket qw(AF_UNIX SOCK_SEQPACKET pack_sockaddr_un); use Errno qw(EPIPE EAGAIN ECONNREFUSED ENOENT ECONNRESET); @@ -24,6 +24,7 @@ use PublicInbox::Lock; use PublicInbox::Eml; use PublicInbox::Import; use PublicInbox::ContentHash qw(git_sha); +use PublicInbox::OnDestroy; use PublicInbox::IPC; use Time::HiRes qw(stat); # ctime comparisons for config cache use File::Path (); @@ -631,9 +632,8 @@ sub _delete_pkt_op { # OnDestroy callback to prevent leaks on die sub pkt_op_pair { my ($self) = @_; - require PublicInbox::OnDestroy; require PublicInbox::PktOp; - my $end = PublicInbox::OnDestroy->new($$, \&_delete_pkt_op, $self); + my $end = on_destroy \&_delete_pkt_op, $self; @$self{qw(pkt_op_c pkt_op_p)} = PublicInbox::PktOp->pair; $end; } @@ -1357,7 +1357,7 @@ sub lazy_start { STDIN->autoflush(1); dump_and_clear_log(); POSIX::setsid() > 0 or die "setsid: $!"; - my $pid = fork; + my $pid = PublicInbox::OnDestroy::fork_tmp; return if $pid; $0 = "lei-daemon $path"; local (%PATH2CFG, $MDIR2CFGPATH); diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 4e3e1d1c..08e61e4b 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -293,7 +293,7 @@ sub start_update_ref { pipe(my $r, my $w); my $cmd = [ 'git', "--git-dir=$fgrp->{cur_dst}", qw(update-ref --stdin -z) ]; - my $pack = PublicInbox::OnDestroy->new($$, \&satellite_done, $fgrp); + my $pack = on_destroy \&satellite_done, $fgrp; start_cmd($fgrp, $cmd, { 0 => $r, 2 => $fgrp->{lei}->{2} }, $pack); close $r; $fgrp->{dry_run} ? undef : $w; @@ -373,10 +373,7 @@ sub fgrpv_done { for my $fgrp (@$fgrpv) { my $rn = $fgrp->{-remote}; my %opt = ( 2 => $fgrp->{lei}->{2} ); - - my $update_ref = PublicInbox::OnDestroy->new($$, - \&fgrp_update, $fgrp); - + my $update_ref = on_destroy \&fgrp_update, $fgrp; my $src = [ 'git', "--git-dir=$fgrp->{-osdir}", 'for-each-ref', "--format=refs/%(refname:lstrip=3)%00%(objectname)", "refs/remotes/$rn/" ]; @@ -467,7 +464,7 @@ sub fgrp_fetch_all { } $cmd = [ @git, "--git-dir=$osdir", @fetch, $grp ]; push @$old, @$new; - my $end = PublicInbox::OnDestroy->new($$, \&fgrpv_done, $old); + my $end = on_destroy \&fgrpv_done, $old; start_cmd($self, $cmd, $opt, $end); } } @@ -567,7 +564,7 @@ sub resume_fetch { my $cmd = [ @{$self->{-torsocks}}, @git, fetch_args($self->{lei}, $opt), $rn ]; push @$cmd, '-P' if $self->{lei}->{prune}; # --prune-tags implied - my $run_puh = PublicInbox::OnDestroy->new($$, \&run_puh, $self, $fini); + my $run_puh = on_destroy \&run_puh, $self, $fini; ++$self->{chg}->{nr_chg}; start_cmd($self, $cmd, $opt, $run_puh); } @@ -599,7 +596,7 @@ sub clone_v1 { return; } } - my $fini = PublicInbox::OnDestroy->new($$, \&v1_done, $self); + my $fini = on_destroy \&v1_done, $self; if (my $fgrp = forkgroup_prep($self, $uri)) { $fgrp->{-fini} = $fini; if ($resume) { @@ -621,8 +618,8 @@ sub clone_v1 { } } ++$self->{chg}->{nr_chg}; - start_cmd($self, $cmd, $opt, PublicInbox::OnDestroy->new($$, - \&run_puh, $self, $fini)); + start_cmd($self, $cmd, $opt, + on_destroy(\&run_puh, $self, $fini)); } if (!$self->{-is_epoch} && $lei->{opt}->{'inbox-config'} =~ /\A(?:always|v1)\z/s && @@ -737,7 +734,7 @@ sub atomic_write ($$$) { sub run_next_puh { my ($self) = @_; my $puh = shift @{$self->{-puh_todo}} // return delete($self->{-fini}); - my $fini = PublicInbox::OnDestroy->new($$, \&run_next_puh, $self); + my $fini = on_destroy \&run_next_puh, $self; my $cmd = [ @$puh, ($self->{cur_dst} // $self->{dst}) ]; my $opt = +{ map { $_ => $self->{lei}->{$_} } (0..2) }; start_cmd($self, $cmd, undef, $opt, $fini); @@ -762,7 +759,7 @@ sub update_ent { my $opt = { 2 => $self->{lei}->{2} }; open($opt->{1}, '+>', undef); $self->{-show_ref_up} = $opt->{1}; - my $done = PublicInbox::OnDestroy->new($$, \&up_fp_done, $self); + my $done = on_destroy \&up_fp_done, $self; start_cmd($self, $cmd, $opt, $done); } $new = $self->{-ent}->{head}; @@ -883,7 +880,7 @@ sub clone_v2_prep ($$;$) { my $want = parse_epochs($lei->{opt}->{epoch}, $v2_epochs); my $task = $m ? bless { %$self }, __PACKAGE__ : $self; my (@skip, $desc); - my $fini = PublicInbox::OnDestroy->new($$, \&v2_done, $task); + my $fini = on_destroy \&v2_done, $task; for my $nr (sort { $a <=> $b } keys %$v2_epochs) { my ($uri, $key) = @{$v2_epochs->{$nr}}; my $src = $uri->as_string; @@ -1018,7 +1015,7 @@ sub clone_all { my ($self, $m) = @_; my $todo = $TODO; $TODO = \'BUG on further use'; - my $end = PublicInbox::OnDestroy->new($$, \&fgrp_fetch_all, $self); + my $end = on_destroy \&fgrp_fetch_all, $self; { my $nodep = delete $todo->{''}; diff --git a/lib/PublicInbox/LeiP2q.pm b/lib/PublicInbox/LeiP2q.pm index 610adb78..68faa016 100644 --- a/lib/PublicInbox/LeiP2q.pm +++ b/lib/PublicInbox/LeiP2q.pm @@ -189,7 +189,7 @@ sub lei_p2q { # the "lei patch-to-query" entry point sub ipc_atfork_child { my ($self) = @_; PublicInbox::LeiInput::input_only_atfork_child($self); - PublicInbox::OnDestroy->new($$, \&emit_query, $self); + on_destroy \&emit_query, $self; } no warnings 'once'; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index a752174d..2eb09eca 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -28,6 +28,7 @@ use PublicInbox::Spawn qw(spawn); use PublicInbox::MdirReader; use PublicInbox::LeiToMail; use PublicInbox::Compat qw(uniqstr); +use PublicInbox::OnDestroy; use File::Temp qw(tmpnam); use POSIX (); use IO::Handle (); # ->autoflush @@ -135,7 +136,7 @@ sub eidx_init { my ($self) = @_; my $eidx = $self->{priv_eidx}; my $tl = wantarray && $self->{-err_wr} ? - PublicInbox::OnDestroy->new($$, \&_tail_err, $self) : + on_destroy(\&_tail_err, $self) : undef; $eidx->idx_init({-private => 1}); # acquires lock wantarray ? ($eidx, $tl) : $eidx; diff --git a/lib/PublicInbox/LeiTag.pm b/lib/PublicInbox/LeiTag.pm index 320b0355..da8caeb7 100644 --- a/lib/PublicInbox/LeiTag.pm +++ b/lib/PublicInbox/LeiTag.pm @@ -1,12 +1,12 @@ -# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # handles "lei tag" command package PublicInbox::LeiTag; -use strict; -use v5.10.1; +use v5.12; use parent qw(PublicInbox::IPC PublicInbox::LeiInput); use PublicInbox::InboxWritable qw(eml_from_path); +use PublicInbox::OnDestroy; sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh my ($self, $eml) = @_; @@ -49,7 +49,7 @@ sub ipc_atfork_child { PublicInbox::LeiInput::input_only_atfork_child($self); $self->{lse} = $self->{lei}->{sto}->search; # this goes out-of-scope at worker process exit: - PublicInbox::OnDestroy->new($$, \¬e_unimported, $self); + on_destroy \¬e_unimported, $self; } # Workaround bash word-splitting s to ['kw', ':', 'keyword' ...] diff --git a/lib/PublicInbox/Lock.pm b/lib/PublicInbox/Lock.pm index 2a5a0f30..7162d80e 100644 --- a/lib/PublicInbox/Lock.pm +++ b/lib/PublicInbox/Lock.pm @@ -43,7 +43,7 @@ sub lock_release { sub lock_for_scope { my ($self) = @_; lock_acquire($self) or return; # lock_path not set - PublicInbox::OnDestroy->new(\&lock_release, $self); + on_destroy \&lock_release, $self; } sub lock_acquire_fast { @@ -60,7 +60,7 @@ sub lock_release_fast { sub lock_for_scope_fast { my ($self) = @_; lock_acquire_fast($self) or return; # lock_path not set - PublicInbox::OnDestroy->new(\&lock_release_fast, $self); + on_destroy \&lock_release_fast, $self; } 1; diff --git a/lib/PublicInbox/MHreader.pm b/lib/PublicInbox/MHreader.pm index 3e7bbd5c..16e505a2 100644 --- a/lib/PublicInbox/MHreader.pm +++ b/lib/PublicInbox/MHreader.pm @@ -52,7 +52,7 @@ EOM sub mh_each_file { my ($self, $efcb, @arg) = @_; opendir(my $dh, my $dir = $self->{dir}); - my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh}); + my $restore = on_destroy \&chdir, $self->{cwdfh}; chdir($dh); my $sort = $self->{sort}; if (defined $sort && "@$sort" ne 'none') { @@ -96,7 +96,7 @@ sub mh_each_eml { sub mh_read_one { my ($self, $n, $ucb, @arg) = @_; - my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh}); + my $restore = on_destroy \&chdir, $self->{cwdfh}; chdir(my $dir = $self->{dir}); _file2eml($dir, $n, $self, $ucb, @arg); } diff --git a/lib/PublicInbox/MboxLock.pm b/lib/PublicInbox/MboxLock.pm index 9d7d4a32..5e373873 100644 --- a/lib/PublicInbox/MboxLock.pm +++ b/lib/PublicInbox/MboxLock.pm @@ -4,7 +4,7 @@ # Various mbox locking methods package PublicInbox::MboxLock; use v5.12; -use PublicInbox::OnDestroy; +use PublicInbox::OnDestroy (); use Fcntl qw(:flock F_SETLK F_SETLKW F_RDLCK F_WRLCK O_CREAT O_EXCL O_WRONLY SEEK_SET); use Carp qw(croak); @@ -122,10 +122,10 @@ sub acq { sub DESTROY { my ($self) = @_; my $f = $self->{".lock$$"} or return; - my $x; + my $od; if (my $dh = delete $self->{dh}) { opendir my $c, '.'; - $x = PublicInbox::OnDestroy->new(\&chdir, $c); + $od = PublicInbox::OnDestroy::all \&chdir, $c; chdir($dh); } CORE::unlink($f) or die "unlink($f): $! (lock stolen?)"; diff --git a/lib/PublicInbox/OnDestroy.pm b/lib/PublicInbox/OnDestroy.pm index d9a6cd24..4301edff 100644 --- a/lib/PublicInbox/OnDestroy.pm +++ b/lib/PublicInbox/OnDestroy.pm @@ -3,22 +3,29 @@ package PublicInbox::OnDestroy; use v5.12; +use parent qw(Exporter); +use autodie qw(fork); +our @EXPORT = qw(on_destroy); +our $fork_gen = 0; -sub new { - shift; # ($class, $cb, @args) - bless [ @_ ], __PACKAGE__; +# either parent or child is expected to exit or exec shortly after this: +sub fork_tmp () { + my $pid = fork; + ++$fork_gen if $pid == 0; + $pid; } +# all children +sub all (@) { bless [ undef, @_ ], __PACKAGE__ } + +# same process +sub on_destroy (@) { bless [ $fork_gen, @_ ], __PACKAGE__ } + sub cancel { @{$_[0]} = () } sub DESTROY { - my ($cb, @args) = @{$_[0]}; - if (!ref($cb) && $cb) { - my $pid = $cb; - return if $pid != $$; - $cb = shift @args; - } - $cb->(@args) if $cb; + my ($fgen, $cb, @args) = @{$_[0]}; + $cb->(@args) if ($cb && ($fgen // $fork_gen) == $fork_gen); } 1; diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 1630eb4a..ea261bda 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -45,7 +45,7 @@ sub ipc_atfork_child { # called automatically before ipc_worker_loop $v2w->{current_info} = "[$self->{shard}]"; # for $SIG{__WARN__} $self->begin_txn_lazy; # caller (ipc_worker_spawn) must capture this: - PublicInbox::OnDestroy->new($$, \&_worker_done, $self); + on_destroy \&_worker_done, $self; } sub index_eml { diff --git a/lib/PublicInbox/SpawnPP.pm b/lib/PublicInbox/SpawnPP.pm index f89d37d4..9ad4d0a1 100644 --- a/lib/PublicInbox/SpawnPP.pm +++ b/lib/PublicInbox/SpawnPP.pm @@ -7,7 +7,8 @@ package PublicInbox::SpawnPP; use v5.12; use POSIX qw(dup2 _exit setpgid :signal_h); -use autodie qw(chdir close fork pipe); +use autodie qw(chdir close pipe); +use PublicInbox::OnDestroy; # this is loaded by PublicInbox::Spawn, so we can't use/require it, here # Pure Perl implementation for folks that do not use Inline::C @@ -22,7 +23,7 @@ sub pi_fork_exec ($$$$$$$) { } sigprocmask(SIG_SETMASK, $set, $old) or die "SIG_SETMASK(set): $!"; pipe(my $r, my $w); - my $pid = fork; + my $pid = PublicInbox::OnDestroy::fork_tmp; if ($pid == 0) { close $r; $SIG{__DIE__} = sub { diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 5f159683..a7ec9b5b 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -588,9 +588,9 @@ sub start_script { require PublicInbox::DS; my $oset = PublicInbox::DS::block_signals(); require PublicInbox::OnDestroy; - my $tmp_mask = PublicInbox::OnDestroy->new( + my $tmp_mask = PublicInbox::OnDestroy::all( \&PublicInbox::DS::sig_setmask, $oset); - my $pid = PublicInbox::DS::do_fork(); + my $pid = PublicInbox::DS::fork_persist(); if ($pid == 0) { close($_) for (@{delete($opt->{-CLOFORK}) // []}); # pretend to be systemd (cf. sd_listen_fds(3)) diff --git a/lib/PublicInbox/Umask.pm b/lib/PublicInbox/Umask.pm index 00772ce5..2c859e65 100644 --- a/lib/PublicInbox/Umask.pm +++ b/lib/PublicInbox/Umask.pm @@ -58,7 +58,7 @@ sub _umask_for { sub with_umask { my ($self, $cb, @arg) = @_; my $old = umask($self->{umask} //= umask_prepare($self)); - my $restore = PublicInbox::OnDestroy->new($$, \&CORE::umask, $old); + my $restore = on_destroy \&CORE::umask, $old; $cb ? $cb->(@arg) : $restore; } diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index 790b9a2c..f47c2703 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -25,6 +25,7 @@ use PublicInbox::Tmpfile; use PublicInbox::ViewDiff qw(flush_diff uri_escape_path); use PublicInbox::View; use PublicInbox::Eml; +use PublicInbox::OnDestroy; use Text::Wrap qw(wrap); use PublicInbox::Hval qw(ascii_html to_filename prurl utf8_maybe); use POSIX qw(strftime); @@ -388,7 +389,7 @@ sub show_commit ($$) { qw(--encoding=UTF-8 -z --no-notes --no-patch), $oid), undef, { 1 => $ctx->{patch_fh} }); $qsp_h->{qsp_err} = \($ctx->{-qsp_err_h} = ''); - my $cmt_fin = PublicInbox::OnDestroy->new($$, \&cmt_fin, $ctx); + my $cmt_fin = on_destroy \&cmt_fin, $ctx; $ctx->{git} = $git; $ctx->{oid} = $oid; $qsp_h->psgi_qx($ctx->{env}, undef, \&cmt_hdr_prep, $ctx, $cmt_fin); @@ -624,7 +625,7 @@ sub start_solver ($) { my $v = $ctx->{qp}->{$from} // next; $ctx->{hints}->{$to} = $v if $v ne ''; } - $ctx->{-next_solver} = PublicInbox::OnDestroy->new($$, \&next_solver); + $ctx->{-next_solver} = on_destroy \&next_solver; ++$solver_nr; $ctx->{-tmp} = File::Temp->newdir("solver.$ctx->{oid_b}-XXXX", TMPDIR => 1); diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 1ec574ea..eb90d353 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -445,7 +445,7 @@ sub imap_idle_reap { # awaitpid callback sub imap_idle_fork { my ($self, $uri, $intvl) = @_; return if $self->{quit}; - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; if ($pid == 0) { watch_atfork_child($self); watch_imap_idle_1($self, $uri, $intvl); @@ -506,7 +506,7 @@ sub poll_fetch_fork { # DS::add_timer callback my @imap = grep { # push() always returns > 0 $_->scheme =~ m!\Aimaps?!i ? 1 : (push(@nntp, $_) < 0) } @$uris; - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; if ($pid == 0) { watch_atfork_child($self); watch_imap_fetch_all($self, \@imap) if @imap; diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm index 61aa7862..a5e2dc4a 100644 --- a/lib/PublicInbox/WwwCoderepo.pm +++ b/lib/PublicInbox/WwwCoderepo.pm @@ -253,7 +253,7 @@ sub summary ($$) { push(@log, $tip) if defined $tip; # limit scope for MockHTTP test (t/solver_git.t) - my $END = PublicInbox::OnDestroy->new($$, \&summary_END, $ctx); + my $END = on_destroy \&summary_END, $ctx; for (['log', \@log], [ 'heads', [@EACH_REF, "--count=$nb", 'refs/heads'] ], [ 'tags', [@EACH_REF, "--count=$nt", 'refs/tags'] ]) { diff --git a/lib/PublicInbox/XapClient.pm b/lib/PublicInbox/XapClient.pm index 4dcbbe5d..98034130 100644 --- a/lib/PublicInbox/XapClient.pm +++ b/lib/PublicInbox/XapClient.pm @@ -11,7 +11,7 @@ use v5.12; use PublicInbox::Spawn qw(spawn); use Socket qw(AF_UNIX SOCK_SEQPACKET); use PublicInbox::IPC; -use autodie qw(fork pipe socketpair); +use autodie qw(pipe socketpair); sub mkreq { my ($self, $ios, @arg) = @_; diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm index ed11a2f8..8c7732f5 100644 --- a/lib/PublicInbox/XapHelper.pm +++ b/lib/PublicInbox/XapHelper.pm @@ -244,7 +244,7 @@ sub reap_worker { # awaitpid CB sub start_worker ($) { my ($nr) = @_; - my $pid = eval { PublicInbox::DS::do_fork } // return(warn($@)); + my $pid = eval { PublicInbox::DS::fork_persist } // return(warn($@)); if ($pid == 0) { undef %WORKERS; $SIG{TTIN} = $SIG{TTOU} = 'IGNORE'; diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 69f0af43..9a148ae4 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -103,7 +103,7 @@ sub commit_changes ($$$$) { sub cb_spawn { my ($cb, $args, $opt) = @_; # $cb = cpdb() or compact() - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; return $pid if $pid > 0; $SIG{__DIE__} = sub { warn @_; _exit(1) }; # don't jump up stack $cb->($args, $opt); diff --git a/script/public-inbox-init b/script/public-inbox-init index 8915cf31..cf6443f7 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -122,7 +122,7 @@ sysopen($lockfh, $lockfile, O_RDWR|O_CREAT|O_EXCL) or do { exit(255); }; require PublicInbox::OnDestroy; -my $auto_unlink = PublicInbox::OnDestroy->new($$, sub { unlink $lockfile }); +my $auto_unlink = PublicInbox::OnDestroy::on_destroy(sub { unlink $lockfile }); my $perm = 0644 & ~umask; my %seen; if (-e $pi_config) { diff --git a/t/lei-sigpipe.t b/t/lei-sigpipe.t index 72bc6c7d..b9fd88a6 100644 --- a/t/lei-sigpipe.t +++ b/t/lei-sigpipe.t @@ -26,9 +26,7 @@ SKIP: { # https://public-inbox.org/meta/20220227080422.gyqowrxomzu6gyin@sourcephile.fr/ my $oldSIGPIPE = $SIG{PIPE}; $SIG{PIPE} = 'DEFAULT'; -my $cleanup = PublicInbox::OnDestroy->new($$, sub { - $SIG{PIPE} = $oldSIGPIPE; -}); +my $cleanup = on_destroy(sub { $SIG{PIPE} = $oldSIGPIPE }); test_lei(sub { my $f = "$ENV{HOME}/big.eml"; diff --git a/t/mbox_lock.t b/t/mbox_lock.t index c2fee0d4..1fc828aa 100644 --- a/t/mbox_lock.t +++ b/t/mbox_lock.t @@ -2,6 +2,7 @@ # Copyright (C) 2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> use strict; use v5.10.1; use PublicInbox::TestCommon; +use autodie qw(chdir); use POSIX qw(_exit); use PublicInbox::DS qw(now); use Errno qw(EAGAIN); @@ -18,11 +19,11 @@ ok(!-f "$f.lock", 'no dotlock with none'); undef $mbl; { opendir my $cur, '.' or BAIL_OUT $!; - my $od = PublicInbox::OnDestroy->new(sub { chdir $cur }); - chdir $tmpdir or BAIL_OUT; + my $od = on_destroy \&chdir, $cur; + chdir $tmpdir; my $abs = "$tmpdir/rel.lock"; my $rel = PublicInbox::MboxLock->acq('rel', 1, ['dotlock']); - chdir '/' or BAIL_OUT; + chdir '/'; ok(-f $abs, 'lock with abs path created'); undef $rel; ok(!-f $abs, 'lock gone despite being in the wrong dir'); diff --git a/t/mh_reader.t b/t/mh_reader.t index c81df32e..95a7be4a 100644 --- a/t/mh_reader.t +++ b/t/mh_reader.t @@ -5,7 +5,6 @@ use PublicInbox::TestCommon; require_ok 'PublicInbox::MHreader'; use PublicInbox::IO qw(write_file); use PublicInbox::Lock; -use PublicInbox::OnDestroy; use PublicInbox::Eml; use File::Path qw(remove_tree); use autodie; diff --git a/t/on_destroy.t b/t/on_destroy.t index e7945100..e8fdf35e 100644 --- a/t/on_destroy.t +++ b/t/on_destroy.t @@ -1,37 +1,44 @@ #!perl -w use v5.12; use Test::More; -require_ok 'PublicInbox::OnDestroy'; +use PublicInbox::OnDestroy; +use POSIX qw(_exit); my @x; -my $od = PublicInbox::OnDestroy->new(sub { push @x, 'hi' }); +my $od = on_destroy sub { push @x, 'hi' }; is_deeply(\@x, [], 'not called, yet'); undef $od; is_deeply(\@x, [ 'hi' ], 'no args works'); -$od = PublicInbox::OnDestroy->new(sub { $x[0] = $_[0] }, 'bye'); +$od = on_destroy sub { $x[0] = $_[0] }, 'bye'; is_deeply(\@x, [ 'hi' ], 'nothing changed while alive'); undef $od; is_deeply(\@x, [ 'bye' ], 'arg passed'); -$od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y)); +$od = on_destroy sub { @x = @_ }, qw(x y); undef $od; is_deeply(\@x, [ 'x', 'y' ], '2 args passed'); open my $tmp, '+>>', undef or BAIL_OUT $!; $tmp->autoflush(1); -$od = PublicInbox::OnDestroy->new(1, sub { print $tmp "$$ DESTROY\n" }); -undef $od; +$od = on_destroy sub { print $tmp "$$ DESTROY\n" }; +my $pid = PublicInbox::OnDestroy::fork_tmp; +if ($pid == 0) { undef $od; _exit 0; }; +waitpid($pid, 0); +is $?, 0, 'test process exited'; is(-s $tmp, 0, '$tmp is empty on pid mismatch'); -$od = PublicInbox::OnDestroy->new($$, sub { $tmp = $$ }); +$od->cancel; +undef $od; +is(-s $tmp, 0, '$tmp is empty after ->cancel'); +$od = on_destroy sub { $tmp = $$ }; undef $od; is($tmp, $$, '$tmp set to $$ by callback'); -$od = PublicInbox::OnDestroy->new($$, sub { $tmp = 'foo' }); +$od = on_destroy sub { $tmp = 'foo' }; $od->cancel; $od = undef; isnt($tmp, 'foo', '->cancel'); if (my $nr = $ENV{TEST_LEAK_NR}) { for (0..$nr) { - $od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y)); + $od = on_destroy sub { @x = @_ }, qw(x y); } } diff --git a/xt/net_writer-imap.t b/xt/net_writer-imap.t index f7796e8e..176502ba 100644 --- a/xt/net_writer-imap.t +++ b/xt/net_writer-imap.t @@ -82,7 +82,7 @@ my $mics = do { $nwr->imap_common_init; }; my $mic = (values %$mics)[0]; -my $cleanup = PublicInbox::OnDestroy->new($$, sub { +my $cleanup = on_destroy sub { if (defined($folder)) { my $mic = $nwr->mic_get($uri); $mic->delete($folder) or @@ -92,7 +92,7 @@ my $cleanup = PublicInbox::OnDestroy->new($$, sub { local $ENV{HOME} = $tmpdir; system(qw(git credential-cache exit)); } -}); +}; my $imap_append = $nwr->can('imap_append'); my $smsg = bless { kw => [ 'seen' ] }, 'PublicInbox::Smsg'; $imap_append->($mic, $folder, undef, $smsg, eml_load('t/plack-qp.eml'));
PID guards for OnDestroy will be the default in an upcoming change. In the meantime, LeiMirror was the only user and didn't actually need it. --- lib/PublicInbox/LeiMirror.pm | 2 +- lib/PublicInbox/Lock.pm | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 5353ae61..4e3e1d1c 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -856,7 +856,7 @@ sub v2_done { # called via OnDestroy my $dst = $self->{cur_dst} // $self->{dst}; require PublicInbox::Lock; my $lk = PublicInbox::Lock->new("$dst/inbox.lock"); - my $lck = $lk->lock_for_scope($$); + my $lck = $lk->lock_for_scope; _write_inbox_config($self); require PublicInbox::MultiGit; my $mg = PublicInbox::MultiGit->new($dst, 'all.git', 'git'); diff --git a/lib/PublicInbox/Lock.pm b/lib/PublicInbox/Lock.pm index ddaf3312..2a5a0f30 100644 --- a/lib/PublicInbox/Lock.pm +++ b/lib/PublicInbox/Lock.pm @@ -41,9 +41,9 @@ sub lock_release { # caller must use return value sub lock_for_scope { - my ($self, @single_pid) = @_; + my ($self) = @_; lock_acquire($self) or return; # lock_path not set - PublicInbox::OnDestroy->new(@single_pid, \&lock_release, $self); + PublicInbox::OnDestroy->new(\&lock_release, $self); } sub lock_acquire_fast { @@ -58,9 +58,9 @@ sub lock_release_fast { # caller must use return value sub lock_for_scope_fast { - my ($self, @single_pid) = @_; + my ($self) = @_; lock_acquire_fast($self) or return; # lock_path not set - PublicInbox::OnDestroy->new(@single_pid, \&lock_release_fast, $self); + PublicInbox::OnDestroy->new(\&lock_release_fast, $self); } 1;
khashl is an updated version of khash with less memory overhead (one bit/bucket instead of two) than the original khash and similar overall performance. Insertions are simpler (linear probing) but deletions may be slightly slower[1]. Of course, the majority of hash tables in git do not delete individual elements. Overall memory usage did not decrease much, as the hash tables and elements we store in them are big and currently dwarf the overhead of the khash internals. Only around 10 MB in allocations (not peak use) is saved when doing a no-op `git gc' of a Linux kernel object store with thousands of refs and islands. A summary of differences I've found from khash to khashl: * two 32-bit ints (instead of four) in the top-level struct * 2 heap allocations (instead of 3) for maps (though I wonder locality suffers when probing is necessary) * 1 bit of metadata per-bucket (no tombstones for deleted elements) * 0.75 load factor. Lowered slightly from 0.77, but no FP multiply and responsible for the aforementioned struct size reduction * FNV-1A instead of x31 hash for strings * Fibonacci hashing (__kh_h2b), probably good for FNV-1A, but I'm skeptical of its usefulness for our SHA-* using cases * linear probing instead of quadratic * Wang's integer hash functions (currently unused) * optional hash value caching and ensemble APIs (currently unused) * some API differences (see below), but not enough to easily use both khash and khashl in the same compilation unit This patch was made with two additional goals to ease review: 1) minimize changes outside of khash*.h files 2) minimize and document all differences from upstream[2] khashl.h Our khashl.h differences from upstream: * favor portability constructs from our codebase: MAYBE_UNUSED over klib_unused, inline over kh_inline, and various integer types * disable packed attribute to satisfy -Werror=address-of-packed-member, AFAIK it doesn't change any of the data structures we use * port the following commits over from our old khash.h: 9249ca26aca3 (khash: factor out kh_release_*, 2018-10-04) 2756ca4347cb (use REALLOC_ARRAY for changing the allocation size of arrays, 2014-09-16) 5632e838f8fa (khash: clarify that allocations never fail, 2021-07-03) * use our memory allocation wrappers * provide wrappers for compatibility with existing callers using the khash API. The khashl function naming convention is: ${NOUN}_${VERB} while the khash convention is: kh_${VERB}_${NOUN}. The kh_${NAME}_t typedef and naming convention are preserved via __KHASH_COMPAT macro to ease review (despite the `_t' suffix being reserved and typedefs being discouraged in the Linux kernel). * copy relevant API docs over from khash.h for identically named macros * preserve kh_begin, kh_foreach, kh_foreach_value from khash.h since khashl.h doesn't provide them * flesh out KHASHL_{SET,MAP}_INIT wrappers with *_clear, *_resize, and *_release functions [1] https://attractivechaos.wordpress.com/2019/12/28/deletion-from-hash-tables-without-tombstones/ [2] git clone https://github.com/attractivechaos/klib.git 2895a16cb55e (support an ensemble of hash tables, 2023-12-18) khashl.h API differences from khash.h which affected this change: * KHASHL_MAP_INIT and KHASHL_SET_INIT macros replace KHASH_INIT * user-supplied hash and equality functions use different names * object-store-ll.h avoided the kh_*_t convention (since I dislike typedef) and was the only place where I had to change a definition. Signed-off-by: Eric Wong <e@80x24.org> --- The git-specific changes are absolutely minimal :> builtin/fast-import.c | 2 +- builtin/fsmonitor--daemon.c | 4 +- delta-islands.c | 4 +- khash.h | 338 ----------------------- khashl.h | 522 ++++++++++++++++++++++++++++++++++++ object-store-ll.h | 2 +- object-store.h | 7 +- oidset.h | 2 +- pack-bitmap.h | 2 +- 9 files changed, 534 insertions(+), 349 deletions(-) delete mode 100644 khash.h create mode 100644 khashl.h diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 71a195ca22..29e50fd675 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -24,7 +24,7 @@ #include "object-store-ll.h" #include "mem-pool.h" #include "commit-reach.h" -#include "khash.h" +#include "khashl.h" #include "date.h" #define PACK_ID_BITS 16 diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 1593713f4c..1c71d96c6d 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -13,7 +13,7 @@ #include "fsmonitor--daemon.h" #include "repository.h" #include "simple-ipc.h" -#include "khash.h" +#include "khashl.h" #include "run-command.h" #include "trace.h" #include "trace2.h" @@ -650,7 +650,7 @@ static int fsmonitor_parse_client_token(const char *buf_token, return 0; } -KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal) +KHASHL_SET_INIT(KH_LOCAL, kh_str, str, const char *, kh_hash_str, kh_eq_str) static int do_handle_client(struct fsmonitor_daemon_state *state, const char *command, diff --git a/delta-islands.c b/delta-islands.c index ee2318d45a..aa35839f15 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -10,14 +10,14 @@ #include "diff.h" #include "progress.h" #include "refs.h" -#include "khash.h" +#include "khashl.h" #include "pack-bitmap.h" #include "pack-objects.h" #include "delta-islands.h" #include "oid-array.h" #include "config.h" -KHASH_INIT(str, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal) +KHASHL_MAP_INIT(KH_LOCAL, kh_str, str, const char *, void *, kh_hash_str, kh_eq_str) static kh_oid_map_t *island_marks; static unsigned island_counter; diff --git a/khash.h b/khash.h deleted file mode 100644 index ff88163177..0000000000 --- a/khash.h +++ /dev/null @@ -1,338 +0,0 @@ -/* The MIT License - - Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk> - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef __AC_KHASH_H -#define __AC_KHASH_H - -#include "hash.h" - -#define AC_VERSION_KHASH_H "0.2.8" - -typedef uint32_t khint32_t; -typedef uint64_t khint64_t; - -typedef khint32_t khint_t; -typedef khint_t khiter_t; - -#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) -#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) -#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) -#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) -#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) -#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) -#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) - -#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) - -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) - -static inline khint_t __ac_X31_hash_string(const char *s) -{ - khint_t h = (khint_t)*s; - if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; - return h; -} - -#define kh_str_hash_func(key) __ac_X31_hash_string(key) -#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) - -static const double __ac_HASH_UPPER = 0.77; - -#define __KHASH_TYPE(name, khkey_t, khval_t) \ - typedef struct kh_##name { \ - khint_t n_buckets, size, n_occupied, upper_bound; \ - khint32_t *flags; \ - khkey_t *keys; \ - khval_t *vals; \ - } kh_##name##_t; - -#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ - kh_##name##_t *kh_init_##name(void); \ - void kh_destroy_##name(kh_##name##_t *h); \ - void kh_clear_##name(kh_##name##_t *h); \ - khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ - void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ - khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ - void kh_del_##name(kh_##name##_t *h, khint_t x); - -#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - SCOPE kh_##name##_t *kh_init_##name(void) { \ - return (kh_##name##_t*)xcalloc(1, sizeof(kh_##name##_t)); \ - } \ - SCOPE void kh_release_##name(kh_##name##_t *h) \ - { \ - free(h->flags); \ - free((void *)h->keys); \ - free((void *)h->vals); \ - } \ - SCOPE void kh_destroy_##name(kh_##name##_t *h) \ - { \ - if (h) { \ - kh_release_##name(h); \ - free(h); \ - } \ - } \ - SCOPE void kh_clear_##name(kh_##name##_t *h) \ - { \ - if (h && h->flags) { \ - memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ - h->size = h->n_occupied = 0; \ - } \ - } \ - SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ - { \ - if (h->n_buckets) { \ - khint_t k, i, last, mask, step = 0; \ - mask = h->n_buckets - 1; \ - k = __hash_func(key); i = k & mask; \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - i = (i + (++step)) & mask; \ - if (i == last) return h->n_buckets; \ - } \ - return __ac_iseither(h->flags, i)? h->n_buckets : i; \ - } else return 0; \ - } \ - SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ - { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ - khint32_t *new_flags = NULL; \ - khint_t j = 1; \ - { \ - kroundup32(new_n_buckets); \ - if (new_n_buckets < 4) new_n_buckets = 4; \ - if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ - else { /* hash table size to be changed (shrink or expand); rehash */ \ - ALLOC_ARRAY(new_flags, __ac_fsize(new_n_buckets)); \ - memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ - if (h->n_buckets < new_n_buckets) { /* expand */ \ - REALLOC_ARRAY(h->keys, new_n_buckets); \ - if (kh_is_map) { \ - REALLOC_ARRAY(h->vals, new_n_buckets); \ - } \ - } /* otherwise shrink */ \ - } \ - } \ - if (j) { /* rehashing is needed */ \ - for (j = 0; j != h->n_buckets; ++j) { \ - if (__ac_iseither(h->flags, j) == 0) { \ - khkey_t key = h->keys[j]; \ - khval_t val; \ - khint_t new_mask; \ - new_mask = new_n_buckets - 1; \ - if (kh_is_map) val = h->vals[j]; \ - __ac_set_isdel_true(h->flags, j); \ - while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ - khint_t k, i, step = 0; \ - k = __hash_func(key); \ - i = k & new_mask; \ - while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ - __ac_set_isempty_false(new_flags, i); \ - if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ - { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ - if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ - __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ - } else { /* write the element and jump out of the loop */ \ - h->keys[i] = key; \ - if (kh_is_map) h->vals[i] = val; \ - break; \ - } \ - } \ - } \ - } \ - if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ - REALLOC_ARRAY(h->keys, new_n_buckets); \ - if (kh_is_map) REALLOC_ARRAY(h->vals, new_n_buckets); \ - } \ - free(h->flags); /* free the working space */ \ - h->flags = new_flags; \ - h->n_buckets = new_n_buckets; \ - h->n_occupied = h->size; \ - h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ - } \ - } \ - SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ - { \ - khint_t x; \ - if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ - if (h->n_buckets > (h->size<<1)) { \ - kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \ - } else { \ - kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \ - } \ - } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ - { \ - khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ - x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ - if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ - else { \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - if (__ac_isdel(h->flags, i)) site = i; \ - i = (i + (++step)) & mask; \ - if (i == last) { x = site; break; } \ - } \ - if (x == h->n_buckets) { \ - if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ - else x = i; \ - } \ - } \ - } \ - if (__ac_isempty(h->flags, x)) { /* not present at all */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; ++h->n_occupied; \ - *ret = 1; \ - } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; \ - *ret = 2; \ - } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ - return x; \ - } \ - SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ - { \ - if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ - __ac_set_isdel_true(h->flags, x); \ - --h->size; \ - } \ - } - -#define KHASH_DECLARE(name, khkey_t, khval_t) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_PROTOTYPES(name, khkey_t, khval_t) - -#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) - -#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - KHASH_INIT2(name, MAYBE_UNUSED static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) - -/* Other convenient macros... */ - -/*! @function - @abstract Test whether a bucket contains data. - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return 1 if containing data; 0 otherwise [int] - */ -#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) - -/*! @function - @abstract Get key given an iterator - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return Key [type of keys] - */ -#define kh_key(h, x) ((h)->keys[x]) - -/*! @function - @abstract Get value given an iterator - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return Value [type of values] - @discussion For hash sets, calling this results in segfault. - */ -#define kh_val(h, x) ((h)->vals[x]) - -/*! @function - @abstract Alias of kh_val() - */ -#define kh_value(h, x) ((h)->vals[x]) - -/*! @function - @abstract Get the start iterator - @param h Pointer to the hash table [khash_t(name)*] - @return The start iterator [khint_t] - */ -#define kh_begin(h) (khint_t)(0) - -/*! @function - @abstract Get the end iterator - @param h Pointer to the hash table [khash_t(name)*] - @return The end iterator [khint_t] - */ -#define kh_end(h) ((h)->n_buckets) - -/*! @function - @abstract Get the number of elements in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @return Number of elements in the hash table [khint_t] - */ -#define kh_size(h) ((h)->size) - -/*! @function - @abstract Get the number of buckets in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @return Number of buckets in the hash table [khint_t] - */ -#define kh_n_buckets(h) ((h)->n_buckets) - -/*! @function - @abstract Iterate over the entries in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @param kvar Variable to which key will be assigned - @param vvar Variable to which value will be assigned - @param code Block of code to execute - */ -#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ - for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ - if (!kh_exist(h,__i)) continue; \ - (kvar) = kh_key(h,__i); \ - (vvar) = kh_val(h,__i); \ - code; \ - } } - -/*! @function - @abstract Iterate over the values in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @param vvar Variable to which value will be assigned - @param code Block of code to execute - */ -#define kh_foreach_value(h, vvar, code) { khint_t __i; \ - for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ - if (!kh_exist(h,__i)) continue; \ - (vvar) = kh_val(h,__i); \ - code; \ - } } - -static inline unsigned int oidhash_by_value(struct object_id oid) -{ - return oidhash(&oid); -} - -static inline int oideq_by_value(struct object_id a, struct object_id b) -{ - return oideq(&a, &b); -} - -KHASH_INIT(oid_set, struct object_id, int, 0, oidhash_by_value, oideq_by_value) - -KHASH_INIT(oid_map, struct object_id, void *, 1, oidhash_by_value, oideq_by_value) - -KHASH_INIT(oid_pos, struct object_id, int, 1, oidhash_by_value, oideq_by_value) - -#endif /* __AC_KHASH_H */ diff --git a/khashl.h b/khashl.h new file mode 100644 index 0000000000..3660fd2ce4 --- /dev/null +++ b/khashl.h @@ -0,0 +1,522 @@ +/* The MIT License + + Copyright (c) 2019-2023 by Attractive Chaos <attractor@live.co.uk> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef __AC_KHASHL_H +#define __AC_KHASHL_H + +#include "hash.h" + +#define AC_VERSION_KHASHL_H "0.2" + +typedef uint32_t khint32_t; +typedef uint64_t khint64_t; + +typedef khint32_t khint_t; +typedef khint_t khiter_t; + +#define kh_inline inline /* portably handled elsewhere */ +#define KH_LOCAL static kh_inline MAYBE_UNUSED + +#ifndef kcalloc +#define kcalloc(N,Z) xcalloc(N,Z) +#endif +#ifndef kfree +#define kfree(P) free(P) +#endif + +/**************************** + * Simple private functions * + ****************************/ + +#define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U) +#define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU)) +#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU))) + +#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5) + +static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); } + +/******************* + * Hash table base * + *******************/ + +#define __KHASHL_TYPE(HType, khkey_t) \ + typedef struct HType { \ + khint_t bits, count; \ + khint32_t *used; \ + khkey_t *keys; \ + } HType; + +#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \ + extern HType *prefix##_init(void); \ + extern void prefix##_destroy(HType *h); \ + extern void prefix##_clear(HType *h); \ + extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \ + extern void prefix##_resize(HType *h, khint_t new_n_buckets); \ + extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \ + extern void prefix##_del(HType *h, khint_t k); + +#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + SCOPE HType *prefix##_init(void) { \ + return (HType*)kcalloc(1, sizeof(HType)); \ + } \ + SCOPE void prefix##_release(HType *h) { \ + kfree((void *)h->keys); kfree(h->used); \ + } \ + SCOPE void prefix##_destroy(HType *h) { \ + if (!h) return; \ + prefix##_release(h); \ + kfree(h); \ + } \ + SCOPE void prefix##_clear(HType *h) { \ + if (h && h->used) { \ + khint_t n_buckets = (khint_t)1U << h->bits; \ + memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \ + h->count = 0; \ + } \ + } + +#define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE khint_t prefix##_getp_core(const HType *h, const khkey_t *key, khint_t hash) { \ + khint_t i, last, n_buckets, mask; \ + if (h->keys == 0) return 0; \ + n_buckets = (khint_t)1U << h->bits; \ + mask = n_buckets - 1U; \ + i = last = __kh_h2b(hash, h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) return n_buckets; \ + } \ + return !__kh_used(h->used, i)? n_buckets : i; \ + } \ + SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { return prefix##_getp_core(h, key, __hash_fn(*key)); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp_core(h, &key, __hash_fn(key)); } + +#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { \ + khint32_t *new_used = 0; \ + khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \ + while ((x >>= 1) != 0) ++j; \ + if (new_n_buckets & (new_n_buckets - 1)) ++j; \ + new_bits = j > 2? j : 2; \ + new_n_buckets = (khint_t)1U << new_bits; \ + if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return; /* noop, requested size is too small */ \ + new_used = (khint32_t*)kcalloc(__kh_fsize(new_n_buckets), sizeof(khint32_t)); \ + n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \ + if (n_buckets < new_n_buckets) { /* expand */ \ + REALLOC_ARRAY(h->keys, new_n_buckets); \ + } /* otherwise shrink */ \ + new_mask = new_n_buckets - 1; \ + for (j = 0; j != n_buckets; ++j) { \ + khkey_t key; \ + if (!__kh_used(h->used, j)) continue; \ + key = h->keys[j]; \ + __kh_set_unused(h->used, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t i; \ + i = __kh_h2b(__hash_fn(key), new_bits); \ + while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \ + __kh_set_used(new_used, i); \ + if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + break; \ + } \ + } \ + } \ + if (n_buckets > new_n_buckets) /* shrink the hash table */ \ + REALLOC_ARRAY(h->keys, new_n_buckets); \ + kfree(h->used); /* free the working space */ \ + h->used = new_used, h->bits = new_bits; \ + } + +#define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE khint_t prefix##_putp_core(HType *h, const khkey_t *key, khint_t hash, int *absent) { \ + khint_t n_buckets, i, last, mask; \ + n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \ + *absent = -1; \ + if (h->count >= (n_buckets>>1) + (n_buckets>>2)) { /* rehashing */ \ + prefix##_resize(h, n_buckets + 1U); \ + n_buckets = (khint_t)1U<<h->bits; \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + mask = n_buckets - 1; \ + i = last = __kh_h2b(hash, h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) break; \ + } \ + if (!__kh_used(h->used, i)) { /* not present at all */ \ + h->keys[i] = *key; \ + __kh_set_used(h->used, i); \ + ++h->count; \ + *absent = 1; \ + } else *absent = 0; /* Don't touch h->keys[i] if present */ \ + return i; \ + } \ + SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { return prefix##_putp_core(h, key, __hash_fn(*key), absent); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp_core(h, &key, __hash_fn(key), absent); } + +#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \ + SCOPE int prefix##_del(HType *h, khint_t i) { \ + khint_t j = i, k, mask, n_buckets; \ + if (h->keys == 0) return 0; \ + n_buckets = (khint_t)1U<<h->bits; \ + mask = n_buckets - 1U; \ + while (1) { \ + j = (j + 1U) & mask; \ + if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \ + k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \ + if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \ + h->keys[i] = h->keys[j], i = j; \ + } \ + __kh_set_unused(h->used, i); \ + --h->count; \ + return 1; \ + } + +#define KHASHL_DECLARE(HType, prefix, khkey_t) \ + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_PROTOTYPES(HType, prefix, khkey_t) + +/* compatibility wrappers to make khash -> khashl migration easier */ +#define __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) \ + typedef HType HType##_t; \ + SCOPE HType *kh_init_##prefix(void) { return prefix##_init(); } \ + SCOPE void kh_release_##prefix(HType *h) { prefix##_release(h); } \ + SCOPE void kh_destroy_##prefix(HType *h) { prefix##_destroy(h); } \ + SCOPE void kh_clear_##prefix(HType *h) { prefix##_clear(h); } \ + SCOPE khint_t kh_get_##prefix(const HType *h, khkey_t key) { \ + return prefix##_get(h, key); \ + } \ + SCOPE void kh_resize_##prefix(HType *h, khint_t new_n_buckets) { \ + prefix##_resize(h, new_n_buckets); \ + } \ + SCOPE khint_t kh_put_##prefix(HType *h, khkey_t key, int *absent) { \ + return prefix##_put(h, key, absent); \ + } \ + SCOPE int kh_del_##prefix(HType *h, khint_t i) { \ + return prefix##_del(h, i); \ + } + +#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) + +/*************************** + * Ensemble of hash tables * + ***************************/ + +typedef struct { + khint_t sub, pos; +} kh_ensitr_t; + +#define KHASHE_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + KHASHL_INIT(KH_LOCAL, HType##_sub, prefix##_sub, khkey_t, __hash_fn, __hash_eq) \ + typedef struct HType { \ + khint64_t count:54, bits:8; \ + HType##_sub *sub; \ + } HType; \ + SCOPE HType *prefix##_init(int bits) { \ + HType *g; \ + g = (HType*)kcalloc(1, sizeof(*g)); \ + g->bits = bits; \ + g->sub = (HType##_sub*)kcalloc(1U<<bits, sizeof(*g->sub)); \ + return g; \ + } \ + SCOPE void prefix##_destroy(HType *g) { \ + int t; \ + if (!g) return; \ + for (t = 0; t < 1<<g->bits; ++t) { kfree((void*)g->sub[t].keys); kfree(g->sub[t].used); } \ + kfree(g->sub); kfree(g); \ + } \ + SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \ + khint_t hash, low, ret; \ + kh_ensitr_t r; \ + HType##_sub *h; \ + hash = __hash_fn(*key); \ + low = hash & ((1U<<g->bits) - 1); \ + h = &g->sub[low]; \ + ret = prefix##_sub_getp_core(h, key, hash); \ + if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \ + else r.sub = low, r.pos = ret; \ + return r; \ + } \ + SCOPE kh_ensitr_t prefix##_get(const HType *g, const khkey_t key) { return prefix##_getp(g, &key); } \ + SCOPE kh_ensitr_t prefix##_putp(HType *g, const khkey_t *key, int *absent) { \ + khint_t hash, low, ret; \ + kh_ensitr_t r; \ + HType##_sub *h; \ + hash = __hash_fn(*key); \ + low = hash & ((1U<<g->bits) - 1); \ + h = &g->sub[low]; \ + ret = prefix##_sub_putp_core(h, key, hash, absent); \ + if (*absent) ++g->count; \ + if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \ + else r.sub = low, r.pos = ret; \ + return r; \ + } \ + SCOPE kh_ensitr_t prefix##_put(HType *g, const khkey_t key, int *absent) { return prefix##_putp(g, &key, absent); } \ + SCOPE int prefix##_del(HType *g, kh_ensitr_t itr) { \ + HType##_sub *h = &g->sub[itr.sub]; \ + int ret; \ + ret = prefix##_sub_del(h, itr.pos); \ + if (ret) --g->count; \ + return ret; \ + } + +/***************************** + * More convenient interface * + *****************************/ + +#define __kh_packed /* noop, we use -Werror=address-of-packed-member */ +#define __kh_cached_hash(x) ((x).hash) + +#define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; } __kh_packed HType##_s_bucket_t; \ + static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \ + SCOPE void prefix##_release(HType *h) { prefix##_s_release(h); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \ + SCOPE void prefix##_clear(HType *h) { prefix##_s_clear(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } \ + __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + +#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ + static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \ + SCOPE void prefix##_release(HType *h) { prefix##_m_release(h); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ + SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_m_resize(h, new_n_buckets); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \ + __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + +#define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \ + static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); } + +#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; khint_t hash; } __kh_packed HType##_cm_bucket_t; \ + static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); } + +#define KHASHE_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ + static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHE_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ + SCOPE HType *prefix##_init(int bits) { return prefix##_m_init(bits); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ + SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_m_del(h, k); } \ + SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } + +/************************** + * Public macro functions * + **************************/ + +#define kh_bucket(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table + @return Number of elements in the hash table [khint_t] + */ +#define kh_size(h) ((h)->count) + +#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table + @return The end iterator [khint_t] + */ +#define kh_end(h) kh_capacity(h) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table + @param x Iterator to the bucket [khint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x].key) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table + @param x Iterator to the bucket [khint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->keys[x].val) + +/*! @function + @abstract Alias of kh_val() + */ +#define kh_value(h, x) kh_val(h, x) + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table + @param x Iterator to the bucket [khint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) __kh_used((h)->used, (x)) + +#define kh_ens_key(g, x) kh_key(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_val(g, x) kh_val(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_exist(g, x) kh_exist(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_is_end(x) ((x).pos == (khint_t)-1) +#define kh_ens_size(g) ((g)->count) + +/************************************** + * Common hash and equality functions * + **************************************/ + +#define kh_eq_generic(a, b) ((a) == (b)) +#define kh_eq_str(a, b) (strcmp((a), (b)) == 0) +#define kh_hash_dummy(x) ((khint_t)(x)) + +static kh_inline khint_t kh_hash_uint32(khint_t key) { + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} + +static kh_inline khint_t kh_hash_uint64(khint64_t key) { + key = ~key + (key << 21); + key = key ^ key >> 24; + key = (key + (key << 3)) + (key << 8); + key = key ^ key >> 14; + key = (key + (key << 2)) + (key << 4); + key = key ^ key >> 28; + key = key + (key << 31); + return (khint_t)key; +} + +#define KH_FNV_SEED 11 + +static kh_inline khint_t kh_hash_str(const char *s) { /* FNV1a */ + khint_t h = KH_FNV_SEED ^ 2166136261U; + const unsigned char *t = (const unsigned char*)s; + for (; *t; ++t) + h ^= *t, h *= 16777619; + return h; +} + +static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) { + khint_t h = KH_FNV_SEED ^ 2166136261U; + int i; + for (i = 0; i < len; ++i) + h ^= s[i], h *= 16777619; + return h; +} + +/*! @function + @abstract Get the start iterator + @param h Pointer to the hash table + @return The start iterator [khint_t] + */ +#define kh_begin(h) (khint_t)(0) + +/*! @function + @abstract Iterate over the entries in the hash table + @param h Pointer to the hash table + @param kvar Variable to which key will be assigned + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (kvar) = kh_key(h,__i); \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/*! @function + @abstract Iterate over the values in the hash table + @param h Pointer to the hash table + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach_value(h, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +static inline unsigned int oidhash_by_value(struct object_id oid) +{ + return oidhash(&oid); +} + +static inline int oideq_by_value(struct object_id a, struct object_id b) +{ + return oideq(&a, &b); +} + +KHASHL_SET_INIT(KH_LOCAL, kh_oid_set, oid_set, struct object_id, + oidhash_by_value, oideq_by_value) + +KHASHL_MAP_INIT(KH_LOCAL, kh_oid_map, oid_map, struct object_id, void *, + oidhash_by_value, oideq_by_value) + +KHASHL_MAP_INIT(KH_LOCAL, kh_oid_pos, oid_pos, struct object_id, int, + oidhash_by_value, oideq_by_value) + +#endif /* __AC_KHASHL_H */ diff --git a/object-store-ll.h b/object-store-ll.h index 26a3895c82..401c4beff5 100644 --- a/object-store-ll.h +++ b/object-store-ll.h @@ -160,7 +160,7 @@ struct raw_object_store { */ struct object_directory *odb; struct object_directory **odb_tail; - struct kh_odb_path_map *odb_by_path; + struct odb_path_map *odb_by_path; int loaded_alternates; diff --git a/object-store.h b/object-store.h index 1b3e3d7d01..3db4802e86 100644 --- a/object-store.h +++ b/object-store.h @@ -1,11 +1,12 @@ #ifndef OBJECT_STORE_H #define OBJECT_STORE_H -#include "khash.h" +#include "khashl.h" #include "dir.h" #include "object-store-ll.h" -KHASH_INIT(odb_path_map, const char * /* key: odb_path */, - struct object_directory *, 1, fspathhash, fspatheq) +KHASHL_MAP_INIT(KH_LOCAL, odb_path_map, odb_path_map, + const char * /* key: odb_path */, struct object_directory *, + fspathhash, fspatheq) #endif /* OBJECT_STORE_H */ diff --git a/oidset.h b/oidset.h index 262f4256d6..17af1b6708 100644 --- a/oidset.h +++ b/oidset.h @@ -1,7 +1,7 @@ #ifndef OIDSET_H #define OIDSET_H -#include "khash.h" +#include "khashl.h" /** * This API is similar to oid-array, in that it maintains a set of object ids diff --git a/pack-bitmap.h b/pack-bitmap.h index c7dea13217..d018365f24 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -2,7 +2,7 @@ #define PACK_BITMAP_H #include "ewah/ewok.h" -#include "khash.h" +#include "khashl.h" #include "pack.h" #include "pack-objects.h" #include "string-list.h"
Using an ensemble of hash tables to implement a larger one can reduce the temporary space required to resize a hash table. However, I haven't been able to measure an improvement using glibc memusage(1), yet. I could be tuning it wrong (too few or too many sub hash tables), and it may not be useful with glibc malloc since large realloc(3) are optimized with mremap(2) to provide in-place growth. --- builtin/fast-import.c | 10 ++++---- builtin/replay.c | 10 ++++---- delta-islands.c | 54 +++++++++++++++++++------------------- fsck.c | 10 ++++---- khashl.h | 60 ++++++++++++++++++++++++++++++++----------- pack-bitmap-write.c | 4 +-- pack-bitmap.c | 32 +++++++++++------------ 7 files changed, 105 insertions(+), 75 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 29e50fd675..190e136e2e 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2198,7 +2198,7 @@ static uintmax_t change_note_fanout(struct tree_entry *root, static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const char **end) { int algo; - khiter_t it; + kh_ensitr_t it; /* Make SHA-1 object IDs have all-zero padding. */ memset(oid->hash, 0, sizeof(oid->hash)); @@ -2209,13 +2209,13 @@ static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const ch it = kh_get_oid_map(sub_oid_map, *oid); /* No such object? */ - if (it == kh_end(sub_oid_map)) { + if (kh_ens_is_end(it)) { /* If we're using the same algorithm, pass it through. */ if (hash_algos[algo].format_id == the_hash_algo->format_id) return 0; return -1; } - oidcpy(oid, kh_value(sub_oid_map, it)); + oidcpy(oid, kh_ens_val(sub_oid_map, it)); return 0; } @@ -3083,13 +3083,13 @@ static void insert_mapped_mark(uintmax_t mark, void *object, void *cbp) struct object_id *fromoid = object; struct object_id *tooid = find_mark(cbp, mark); int ret; - khiter_t it; + kh_ensitr_t it; it = kh_put_oid_map(sub_oid_map, *fromoid, &ret); /* We've already seen this object. */ if (ret == 0) return; - kh_value(sub_oid_map, it) = tooid; + kh_ens_val(sub_oid_map, it) = tooid; } static void build_mark_map_one(struct mark_set *from, struct mark_set *to) diff --git a/builtin/replay.c b/builtin/replay.c index 6bc4b47f09..e084da8a94 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -227,10 +227,10 @@ static struct commit *mapped_commit(kh_oid_map_t *replayed_commits, struct commit *commit, struct commit *fallback) { - khint_t pos = kh_get_oid_map(replayed_commits, commit->object.oid); - if (pos == kh_end(replayed_commits)) + kh_ensitr_t pos = kh_get_oid_map(replayed_commits, commit->object.oid); + if (kh_ens_is_end(pos)) return fallback; - return kh_value(replayed_commits, pos); + return kh_ens_val(replayed_commits, pos); } static struct commit *pick_regular_commit(struct commit *pickme, @@ -381,7 +381,7 @@ int cmd_replay(int argc, const char **argv, const char *prefix) replayed_commits = kh_init_oid_map(); while ((commit = get_revision(&revs))) { const struct name_decoration *decoration; - khint_t pos; + kh_ensitr_t pos; int hr; if (!commit->parents) @@ -399,7 +399,7 @@ int cmd_replay(int argc, const char **argv, const char *prefix) if (hr == 0) BUG("Duplicate rewritten commit: %s\n", oid_to_hex(&commit->object.oid)); - kh_value(replayed_commits, pos) = last_commit; + kh_ens_val(replayed_commits, pos) = last_commit; /* Update any necessary branches */ if (advance_name) diff --git a/delta-islands.c b/delta-islands.c index aa35839f15..de159e98a8 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -90,7 +90,7 @@ static int island_bitmap_get(struct island_bitmap *self, uint32_t i) int in_same_island(const struct object_id *trg_oid, const struct object_id *src_oid) { - khiter_t trg_pos, src_pos; + kh_ensitr_t trg_pos, src_pos; /* If we aren't using islands, assume everything goes together. */ if (!island_marks) @@ -101,7 +101,7 @@ int in_same_island(const struct object_id *trg_oid, const struct object_id *src_ * against anything -- it's not an important object */ trg_pos = kh_get_oid_map(island_marks, *trg_oid); - if (trg_pos >= kh_end(island_marks)) + if (kh_ens_is_end(trg_pos)) return 1; /* @@ -109,28 +109,28 @@ int in_same_island(const struct object_id *trg_oid, const struct object_id *src_ * we don't want to base any deltas on it! */ src_pos = kh_get_oid_map(island_marks, *src_oid); - if (src_pos >= kh_end(island_marks)) + if (kh_ens_is_end(src_pos)) return 0; - return island_bitmap_is_subset(kh_value(island_marks, trg_pos), - kh_value(island_marks, src_pos)); + return island_bitmap_is_subset(kh_ens_val(island_marks, trg_pos), + kh_ens_val(island_marks, src_pos)); } int island_delta_cmp(const struct object_id *a, const struct object_id *b) { - khiter_t a_pos, b_pos; + kh_ensitr_t a_pos, b_pos; struct island_bitmap *a_bitmap = NULL, *b_bitmap = NULL; if (!island_marks) return 0; a_pos = kh_get_oid_map(island_marks, *a); - if (a_pos < kh_end(island_marks)) - a_bitmap = kh_value(island_marks, a_pos); + if (!kh_ens_is_end(a_pos)) + a_bitmap = kh_ens_val(island_marks, a_pos); b_pos = kh_get_oid_map(island_marks, *b); - if (b_pos < kh_end(island_marks)) - b_bitmap = kh_value(island_marks, b_pos); + if (!kh_ens_is_end(b_pos)) + b_bitmap = kh_ens_val(island_marks, b_pos); if (a_bitmap) { if (!b_bitmap || !island_bitmap_is_subset(a_bitmap, b_bitmap)) @@ -146,20 +146,20 @@ int island_delta_cmp(const struct object_id *a, const struct object_id *b) static struct island_bitmap *create_or_get_island_marks(struct object *obj) { - khiter_t pos; + kh_ensitr_t pos; int hash_ret; pos = kh_put_oid_map(island_marks, obj->oid, &hash_ret); if (hash_ret) - kh_value(island_marks, pos) = island_bitmap_new(NULL); + kh_ens_val(island_marks, pos) = island_bitmap_new(NULL); - return kh_value(island_marks, pos); + return kh_ens_val(island_marks, pos); } static void set_island_marks(struct object *obj, struct island_bitmap *marks) { struct island_bitmap *b; - khiter_t pos; + kh_ensitr_t pos; int hash_ret; pos = kh_put_oid_map(island_marks, obj->oid, &hash_ret); @@ -169,7 +169,7 @@ static void set_island_marks(struct object *obj, struct island_bitmap *marks) * parent. */ marks->refcount++; - kh_value(island_marks, pos) = marks; + kh_ens_val(island_marks, pos) = marks; return; } @@ -177,10 +177,10 @@ static void set_island_marks(struct object *obj, struct island_bitmap *marks) * We do have it. Make sure we split any copy-on-write before * updating. */ - b = kh_value(island_marks, pos); + b = kh_ens_val(island_marks, pos); if (b->refcount > 1) { b->refcount--; - b = kh_value(island_marks, pos) = island_bitmap_new(b); + b = kh_ens_val(island_marks, pos) = island_bitmap_new(b); } island_bitmap_or(b, marks); } @@ -272,13 +272,13 @@ void resolve_tree_islands(struct repository *r, struct tree *tree; struct tree_desc desc; struct name_entry entry; - khiter_t pos; + kh_ensitr_t pos; pos = kh_get_oid_map(island_marks, ent->idx.oid); - if (pos >= kh_end(island_marks)) + if (kh_ens_is_end(pos)) continue; - root_marks = kh_value(island_marks, pos); + root_marks = kh_ens_val(island_marks, pos); tree = lookup_tree(r, &ent->idx.oid); if (!tree || parse_tree(tree) < 0) @@ -499,11 +499,11 @@ void load_delta_islands(struct repository *r, int progress) void propagate_island_marks(struct commit *commit) { - khiter_t pos = kh_get_oid_map(island_marks, commit->object.oid); + kh_ensitr_t pos = kh_get_oid_map(island_marks, commit->object.oid); - if (pos < kh_end(island_marks)) { + if (!kh_ens_is_end(pos)) { struct commit_list *p; - struct island_bitmap *root_marks = kh_value(island_marks, pos); + struct island_bitmap *root_marks = kh_ens_val(island_marks, pos); repo_parse_commit(the_repository, commit); set_island_marks(&repo_get_commit_tree(the_repository, commit)->object, @@ -518,7 +518,7 @@ void free_island_marks(void) struct island_bitmap *bitmap; if (island_marks) { - kh_foreach_value(island_marks, bitmap, { + kh_ens_foreach_value(island_marks, bitmap, { if (!--bitmap->refcount) free(bitmap); }); @@ -538,12 +538,12 @@ int compute_pack_layers(struct packing_data *to_pack) for (i = 0; i < to_pack->nr_objects; ++i) { struct object_entry *entry = &to_pack->objects[i]; - khiter_t pos = kh_get_oid_map(island_marks, entry->idx.oid); + kh_ensitr_t pos = kh_get_oid_map(island_marks, entry->idx.oid); oe_set_layer(to_pack, entry, 1); - if (pos < kh_end(island_marks)) { - struct island_bitmap *bitmap = kh_value(island_marks, pos); + if (!kh_ens_is_end(pos)) { + struct island_bitmap *bitmap = kh_ens_val(island_marks, pos); if (island_bitmap_get(bitmap, island_counter_core)) oe_set_layer(to_pack, entry, 0); diff --git a/fsck.c b/fsck.c index 8ded0a473a..4c67e1e64c 100644 --- a/fsck.c +++ b/fsck.c @@ -266,13 +266,13 @@ void fsck_enable_object_names(struct fsck_options *options) const char *fsck_get_object_name(struct fsck_options *options, const struct object_id *oid) { - khiter_t pos; + kh_ensitr_t pos; if (!options->object_names) return NULL; pos = kh_get_oid_map(options->object_names, *oid); - if (pos >= kh_end(options->object_names)) + if (kh_ens_is_end(pos)) return NULL; - return kh_value(options->object_names, pos); + return kh_ens_val(options->object_names, pos); } void fsck_put_object_name(struct fsck_options *options, @@ -281,7 +281,7 @@ void fsck_put_object_name(struct fsck_options *options, { va_list ap; struct strbuf buf = STRBUF_INIT; - khiter_t pos; + kh_ensitr_t pos; int hashret; if (!options->object_names) @@ -292,7 +292,7 @@ void fsck_put_object_name(struct fsck_options *options, return; va_start(ap, fmt); strbuf_vaddf(&buf, fmt, ap); - kh_value(options->object_names, pos) = strbuf_detach(&buf, NULL); + kh_ens_val(options->object_names, pos) = strbuf_detach(&buf, NULL); va_end(ap); } diff --git a/khashl.h b/khashl.h index 8ffe80fbb2..e950593d61 100644 --- a/khashl.h +++ b/khashl.h @@ -203,22 +203,22 @@ static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 26 __KHASHL_PROTOTYPES(HType, prefix, khkey_t) /* compatibility wrappers to make khash -> khashl migration easier */ -#define __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) \ +#define __KHASH_COMPAT(SCOPE, kh_idx, HType, prefix, khkey_t) \ typedef HType HType##_t; \ SCOPE HType *kh_init_##prefix(void) { return prefix##_init(); } \ SCOPE void kh_release_##prefix(HType *h) { prefix##_release(h); } \ SCOPE void kh_destroy_##prefix(HType *h) { prefix##_destroy(h); } \ SCOPE void kh_clear_##prefix(HType *h) { prefix##_clear(h); } \ - SCOPE khint_t kh_get_##prefix(const HType *h, khkey_t key) { \ + SCOPE kh_idx kh_get_##prefix(const HType *h, khkey_t key) { \ return prefix##_get(h, key); \ } \ SCOPE void kh_resize_##prefix(HType *h, khint_t new_n_buckets) { \ prefix##_resize(h, new_n_buckets); \ } \ - SCOPE khint_t kh_put_##prefix(HType *h, khkey_t key, int *absent) { \ + SCOPE kh_idx kh_put_##prefix(HType *h, khkey_t key, int *absent) { \ return prefix##_put(h, key, absent); \ } \ - SCOPE int kh_del_##prefix(HType *h, khint_t i) { \ + SCOPE int kh_del_##prefix(HType *h, kh_idx i) { \ return prefix##_del(h, i); \ } @@ -244,18 +244,32 @@ typedef struct { khint64_t count:54, bits:8; \ HType##_sub *sub; \ } HType; \ - SCOPE HType *prefix##_init(int bits) { \ - HType *g; \ - g = (HType*)kcalloc(1, sizeof(*g)); \ + SCOPE HType *prefix##_init_bits(HType *g, size_t bits) { \ g->bits = bits; \ g->sub = (HType##_sub*)kcalloc(1U<<bits, sizeof(*g->sub)); \ return g; \ } \ + SCOPE HType *prefix##_init(void) { \ + HType *g; \ + g = (HType*)kcalloc(1, sizeof(*g)); \ + return prefix##_init_bits(g, 6); /* unsure about default */ \ + } \ + SCOPE void prefix##_release(HType *g) { \ + int t; \ + for (t = 0; t < 1<<g->bits; ++t) \ + prefix##_sub_release(&g->sub[t]); \ + kfree(g->sub); \ + } \ SCOPE void prefix##_destroy(HType *g) { \ + if (!g) return; \ + prefix##_release(g); \ + kfree(g); \ + } \ + SCOPE void prefix##_clear(HType *g) { \ int t; \ if (!g) return; \ - for (t = 0; t < 1<<g->bits; ++t) { kfree((void*)g->sub[t].keys); kfree(g->sub[t].used); } \ - kfree(g->sub); kfree(g); \ + for (t = 0; t < 1<<g->bits; ++t) \ + prefix##_sub_clear(&g->sub[t]); \ } \ SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \ khint_t hash, low, ret; \ @@ -312,7 +326,7 @@ typedef struct { SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } \ - __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + __KHASH_COMPAT(SCOPE, khint_t, HType, prefix, khkey_t) #define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ @@ -327,7 +341,7 @@ typedef struct { SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \ - __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + __KHASH_COMPAT(SCOPE, khint_t, HType, prefix, khkey_t) #define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \ @@ -354,11 +368,15 @@ typedef struct { static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ KHASHE_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ - SCOPE HType *prefix##_init(int bits) { return prefix##_m_init(bits); } \ + SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \ + SCOPE void prefix##_release(HType *h) { prefix##_m_release(h); } \ SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ + SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t ignore) { /* noop */ } \ SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_m_del(h, k); } \ - SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } + SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \ + __KHASH_COMPAT(SCOPE, kh_ensitr_t, HType, prefix, khkey_t) /************************** * Public macro functions * @@ -487,6 +505,18 @@ static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) { code; \ } } +#define kh_ens_foreach(g, kvar, vvar, code) do { \ + size_t t; \ + for (t = 0; t < 1<<g->bits; ++t) \ + kh_foreach(&g->sub[t], kvar, vvar, code); \ +} while (0) + +#define kh_ens_foreach_value(g, vvar, code) do { \ + size_t t; \ + for (t = 0; t < 1<<g->bits; ++t) \ + kh_foreach_value(&g->sub[t], vvar, code); \ +} while (0) + /*! @function @abstract Iterate over the values in the hash table @param h Pointer to the hash table @@ -513,10 +543,10 @@ static inline int oideq_by_value(struct object_id a, struct object_id b) KHASHL_SET_INIT(KH_LOCAL, kh_oid_set, oid_set, struct object_id, oidhash_by_value, oideq_by_value) -KHASHL_MAP_INIT(KH_LOCAL, kh_oid_map, oid_map, struct object_id, void *, +KHASHE_MAP_INIT(KH_LOCAL, kh_oid_map, oid_map, struct object_id, void *, oidhash_by_value, oideq_by_value) -KHASHL_MAP_INIT(KH_LOCAL, kh_oid_pos, oid_pos, struct object_id, int, +KHASHE_MAP_INIT(KH_LOCAL, kh_oid_pos, oid_pos, struct object_id, int, oidhash_by_value, oideq_by_value) #endif /* __AC_KHASHL_H */ diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 990a9498d7..bbf2090c46 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -465,7 +465,7 @@ static int fill_bitmap_commit(struct bb_commit *ent, static void store_selected(struct bb_commit *ent, struct commit *commit) { struct bitmapped_commit *stored = &writer.selected[ent->idx]; - khiter_t hash_pos; + kh_ensitr_t hash_pos; int hash_ret; stored->bitmap = bitmap_to_ewah(ent->bitmap); @@ -474,7 +474,7 @@ static void store_selected(struct bb_commit *ent, struct commit *commit) if (hash_ret == 0) die("Duplicate entry when writing index: %s", oid_to_hex(&commit->object.oid)); - kh_value(writer.bitmaps, hash_pos) = stored; + kh_ens_val(writer.bitmaps, hash_pos) = stored; } int bitmap_writer_build(struct packing_data *to_pack) diff --git a/pack-bitmap.c b/pack-bitmap.c index 2baeabacee..68cd893dee 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -214,7 +214,7 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, int flags) { struct stored_bitmap *stored; - khiter_t hash_pos; + kh_ensitr_t hash_pos; int ret; stored = xmalloc(sizeof(struct stored_bitmap)); @@ -235,7 +235,7 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, return NULL; } - kh_value(index->bitmaps, hash_pos) = stored; + kh_ens_val(index->bitmaps, hash_pos) = stored; return stored; } @@ -721,7 +721,7 @@ static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_ static size_t xor_items_nr = 0, xor_items_alloc = 0; static int is_corrupt = 0; int xor_flags; - khiter_t hash_pos; + kh_ensitr_t hash_pos; struct bitmap_lookup_table_xor_item *xor_item; if (is_corrupt) @@ -766,8 +766,8 @@ static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_ * has already been stored. So, assign this stored bitmap * to the xor_bitmap. */ - if (hash_pos < kh_end(bitmap_git->bitmaps) && - (xor_bitmap = kh_value(bitmap_git->bitmaps, hash_pos))) + if (!kh_ens_is_end(hash_pos) && + (xor_bitmap = kh_ens_val(bitmap_git->bitmaps, hash_pos))) break; xor_items_nr++; xor_row = triplet.xor_row; @@ -841,9 +841,9 @@ static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_ struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, struct commit *commit) { - khiter_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps, + kh_ensitr_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps, commit->object.oid); - if (hash_pos >= kh_end(bitmap_git->bitmaps)) { + if (kh_ens_is_end(hash_pos)) { struct stored_bitmap *bitmap = NULL; if (!bitmap_git->table_lookup) return NULL; @@ -855,17 +855,17 @@ struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, return NULL; return lookup_stored_bitmap(bitmap); } - return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos)); + return lookup_stored_bitmap(kh_ens_val(bitmap_git->bitmaps, hash_pos)); } static inline int bitmap_position_extended(struct bitmap_index *bitmap_git, const struct object_id *oid) { kh_oid_pos_t *positions = bitmap_git->ext_index.positions; - khiter_t pos = kh_get_oid_pos(positions, *oid); + kh_ensitr_t pos = kh_get_oid_pos(positions, *oid); - if (pos < kh_end(positions)) { - int bitmap_pos = kh_value(positions, pos); + if (!kh_ens_is_end(pos)) { + int bitmap_pos = kh_ens_val(positions, pos); return bitmap_pos + bitmap_num_objects(bitmap_git); } @@ -913,7 +913,7 @@ static int ext_index_add_object(struct bitmap_index *bitmap_git, { struct eindex *eindex = &bitmap_git->ext_index; - khiter_t hash_pos; + kh_ensitr_t hash_pos; int hash_ret; int bitmap_pos; @@ -928,10 +928,10 @@ static int ext_index_add_object(struct bitmap_index *bitmap_git, bitmap_pos = eindex->count; eindex->objects[eindex->count] = object; eindex->hashes[eindex->count] = pack_name_hash(name); - kh_value(eindex->positions, hash_pos) = bitmap_pos; + kh_ens_val(eindex->positions, hash_pos) = bitmap_pos; eindex->count++; } else { - bitmap_pos = kh_value(eindex->positions, hash_pos); + bitmap_pos = kh_ens_val(eindex->positions, hash_pos); } return bitmap_pos + bitmap_num_objects(bitmap_git); @@ -2361,7 +2361,7 @@ int test_bitmap_commits(struct repository *r) die(_("failed to load bitmap indexes")); } - kh_foreach(bitmap_git->bitmaps, oid, value, { + kh_ens_foreach(bitmap_git->bitmaps, oid, value, { printf_ln("%s", oid_to_hex(&oid)); }); @@ -2479,7 +2479,7 @@ void free_bitmap_index(struct bitmap_index *b) ewah_pool_free(b->tags); if (b->bitmaps) { struct stored_bitmap *sb; - kh_foreach_value(b->bitmaps, sb, { + kh_ens_foreach_value(b->bitmaps, sb, { ewah_pool_free(sb->root); free(sb); });
The ->bits field of regular khashl structs is invalid when the ->keys array is NULL. Thus the ensemble *_getp implementation must follow existing *_get and *_getp usage conventions and check the iterator against kh_end(). This fixes a fast-import crash on t3427-rebase-subtree.sh in an abandoned commit to use the ensemble implementation for oid_map and oid_pos. I've abandoned the aforementioned commit for now since it was more intrusive, more expensive for small tables, and realloc(3) on glibc is already optimized using mremap(2) for large hash resizes. Signed-off-by: Eric Wong <e@80x24.org> --- khashl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/khashl.h b/khashl.h index 3660fd2ce4..8ffe80fbb2 100644 --- a/khashl.h +++ b/khashl.h @@ -265,7 +265,7 @@ typedef struct { low = hash & ((1U<<g->bits) - 1); \ h = &g->sub[low]; \ ret = prefix##_sub_getp_core(h, key, hash); \ - if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \ + if (ret >= kh_end(h)) r.sub = low, r.pos = (khint_t)-1; \ else r.sub = low, r.pos = ret; \ return r; \ } \
In order to ease a potential migration to from khash to khashl, use the kh_size() macro instead of accessing the .size field directly. Signed-off-by: Eric Wong <e@80x24.org> --- list-objects-filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/list-objects-filter.c b/list-objects-filter.c index 4346f8da45..440f112d23 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -704,7 +704,7 @@ static void filter_combine__free(void *filter_data) for (sub = 0; sub < d->nr; sub++) { list_objects_filter__free(d->sub[sub].filter); oidset_clear(&d->sub[sub].seen); - if (d->sub[sub].omits.set.size) + if (kh_size(&d->sub[sub].omits.set)) BUG("expected oidset to be cleared already"); } free(d->sub);
The memory improvement is minor, but any memory reduction at all is welcome at this point. Fortunately, this set of changes is unintrusive. I have some other ideas that I'll hopefully get to implement before swapping kills all my SSDs (see bottom). Eric Wong (3): list-objects-filter: use kh_size API treewide: switch to khashl for memory savings khashl: fix ensemble lookups on empty table builtin/fast-import.c | 2 +- builtin/fsmonitor--daemon.c | 4 +- delta-islands.c | 4 +- khash.h | 338 ----------------------- khashl.h | 522 ++++++++++++++++++++++++++++++++++++ list-objects-filter.c | 2 +- object-store-ll.h | 2 +- object-store.h | 7 +- oidset.h | 2 +- pack-bitmap.h | 2 +- 10 files changed, 535 insertions(+), 350 deletions(-) delete mode 100644 khash.h create mode 100644 khashl.h TODO (some ideas stolen from khashl): * obj_hash can probably use a 0.75 load factor (instead of 0.5) to save memory and not slow down too much. oid_* khash has always had 0.77 which was fine and now khashl has 0.75. 0.75 is cheaper to compute via (shifts + ORs) than 0.77. * obj_hash can use `i = (i + 1) & mask' like khashl does to avoid branches in linear probe loops * obj_hash can use realloc and copy in-place resize logic khashl does. khashl uses the ->used bitmap for this, but it should be possible to do for obj_hash w/o additional allocations via pointer tagging (not khashl inspired): * keep an identity pool of OIDs separately (similar to how Perl5 pools all hash keys) and only use tagged pointers for OIDs. Pointer tagging can be used to distinguish between 7 hash functions, leaving us room for 5 more in addition to SHA-(1|256). This change will be a large effort (with a hopefully large savings). If we ever need more than 7 hash functions, we can switch to storing hash type information in extents that can be looked up using address ranges (AFAIK, jemalloc does this).
--- object.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/object.c b/object.c index 4219da9e3b..0224712a1c 100644 --- a/object.c +++ b/object.c @@ -74,12 +74,10 @@ static unsigned int hash_obj(const struct object_id *oid, unsigned int n) static void insert_obj_hash(struct object *obj, struct object **hash, unsigned int size) { unsigned int j = hash_obj(&obj->oid, size); + unsigned int mask = size - 1; - while (hash[j]) { - j++; - if (j >= size) - j = 0; - } + while (hash[j]) + j = (j + 1) & mask; hash[j] = obj; } @@ -89,19 +87,18 @@ static void insert_obj_hash(struct object *obj, struct object **hash, unsigned i */ struct object *lookup_object(struct repository *r, const struct object_id *oid) { - unsigned int i, first; + unsigned int i, first, mask; struct object *obj; if (!r->parsed_objects->obj_hash) return NULL; first = i = hash_obj(oid, r->parsed_objects->obj_hash_size); + mask = r->parsed_objects->obj_hash_size - 1; while ((obj = r->parsed_objects->obj_hash[i]) != NULL) { if (oideq(oid, &obj->oid)) break; - i++; - if (i == r->parsed_objects->obj_hash_size) - i = 0; + i = (i + 1) & mask; } if (obj && i != first) { /*
Using an ensemble of hash tables to implement a larger one reduces the space required to resize a hash table and lowers overhead. Instead of doubling the size of the entire table, only a sub-hash is doubled in size when growing. --- builtin/fast-import.c | 10 ++++---- builtin/replay.c | 10 ++++---- delta-islands.c | 54 +++++++++++++++++++++---------------------- fsck.c | 10 ++++---- khashl.h | 4 ++-- pack-bitmap-write.c | 4 ++-- pack-bitmap.c | 32 ++++++++++++------------- 7 files changed, 62 insertions(+), 62 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 29e50fd675..190e136e2e 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2198,7 +2198,7 @@ static uintmax_t change_note_fanout(struct tree_entry *root, static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const char **end) { int algo; - khiter_t it; + kh_ensitr_t it; /* Make SHA-1 object IDs have all-zero padding. */ memset(oid->hash, 0, sizeof(oid->hash)); @@ -2209,13 +2209,13 @@ static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const ch it = kh_get_oid_map(sub_oid_map, *oid); /* No such object? */ - if (it == kh_end(sub_oid_map)) { + if (kh_ens_is_end(it)) { /* If we're using the same algorithm, pass it through. */ if (hash_algos[algo].format_id == the_hash_algo->format_id) return 0; return -1; } - oidcpy(oid, kh_value(sub_oid_map, it)); + oidcpy(oid, kh_ens_val(sub_oid_map, it)); return 0; } @@ -3083,13 +3083,13 @@ static void insert_mapped_mark(uintmax_t mark, void *object, void *cbp) struct object_id *fromoid = object; struct object_id *tooid = find_mark(cbp, mark); int ret; - khiter_t it; + kh_ensitr_t it; it = kh_put_oid_map(sub_oid_map, *fromoid, &ret); /* We've already seen this object. */ if (ret == 0) return; - kh_value(sub_oid_map, it) = tooid; + kh_ens_val(sub_oid_map, it) = tooid; } static void build_mark_map_one(struct mark_set *from, struct mark_set *to) diff --git a/builtin/replay.c b/builtin/replay.c index 6bc4b47f09..e084da8a94 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -227,10 +227,10 @@ static struct commit *mapped_commit(kh_oid_map_t *replayed_commits, struct commit *commit, struct commit *fallback) { - khint_t pos = kh_get_oid_map(replayed_commits, commit->object.oid); - if (pos == kh_end(replayed_commits)) + kh_ensitr_t pos = kh_get_oid_map(replayed_commits, commit->object.oid); + if (kh_ens_is_end(pos)) return fallback; - return kh_value(replayed_commits, pos); + return kh_ens_val(replayed_commits, pos); } static struct commit *pick_regular_commit(struct commit *pickme, @@ -381,7 +381,7 @@ int cmd_replay(int argc, const char **argv, const char *prefix) replayed_commits = kh_init_oid_map(); while ((commit = get_revision(&revs))) { const struct name_decoration *decoration; - khint_t pos; + kh_ensitr_t pos; int hr; if (!commit->parents) @@ -399,7 +399,7 @@ int cmd_replay(int argc, const char **argv, const char *prefix) if (hr == 0) BUG("Duplicate rewritten commit: %s\n", oid_to_hex(&commit->object.oid)); - kh_value(replayed_commits, pos) = last_commit; + kh_ens_val(replayed_commits, pos) = last_commit; /* Update any necessary branches */ if (advance_name) diff --git a/delta-islands.c b/delta-islands.c index aa35839f15..de159e98a8 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -90,7 +90,7 @@ static int island_bitmap_get(struct island_bitmap *self, uint32_t i) int in_same_island(const struct object_id *trg_oid, const struct object_id *src_oid) { - khiter_t trg_pos, src_pos; + kh_ensitr_t trg_pos, src_pos; /* If we aren't using islands, assume everything goes together. */ if (!island_marks) @@ -101,7 +101,7 @@ int in_same_island(const struct object_id *trg_oid, const struct object_id *src_ * against anything -- it's not an important object */ trg_pos = kh_get_oid_map(island_marks, *trg_oid); - if (trg_pos >= kh_end(island_marks)) + if (kh_ens_is_end(trg_pos)) return 1; /* @@ -109,28 +109,28 @@ int in_same_island(const struct object_id *trg_oid, const struct object_id *src_ * we don't want to base any deltas on it! */ src_pos = kh_get_oid_map(island_marks, *src_oid); - if (src_pos >= kh_end(island_marks)) + if (kh_ens_is_end(src_pos)) return 0; - return island_bitmap_is_subset(kh_value(island_marks, trg_pos), - kh_value(island_marks, src_pos)); + return island_bitmap_is_subset(kh_ens_val(island_marks, trg_pos), + kh_ens_val(island_marks, src_pos)); } int island_delta_cmp(const struct object_id *a, const struct object_id *b) { - khiter_t a_pos, b_pos; + kh_ensitr_t a_pos, b_pos; struct island_bitmap *a_bitmap = NULL, *b_bitmap = NULL; if (!island_marks) return 0; a_pos = kh_get_oid_map(island_marks, *a); - if (a_pos < kh_end(island_marks)) - a_bitmap = kh_value(island_marks, a_pos); + if (!kh_ens_is_end(a_pos)) + a_bitmap = kh_ens_val(island_marks, a_pos); b_pos = kh_get_oid_map(island_marks, *b); - if (b_pos < kh_end(island_marks)) - b_bitmap = kh_value(island_marks, b_pos); + if (!kh_ens_is_end(b_pos)) + b_bitmap = kh_ens_val(island_marks, b_pos); if (a_bitmap) { if (!b_bitmap || !island_bitmap_is_subset(a_bitmap, b_bitmap)) @@ -146,20 +146,20 @@ int island_delta_cmp(const struct object_id *a, const struct object_id *b) static struct island_bitmap *create_or_get_island_marks(struct object *obj) { - khiter_t pos; + kh_ensitr_t pos; int hash_ret; pos = kh_put_oid_map(island_marks, obj->oid, &hash_ret); if (hash_ret) - kh_value(island_marks, pos) = island_bitmap_new(NULL); + kh_ens_val(island_marks, pos) = island_bitmap_new(NULL); - return kh_value(island_marks, pos); + return kh_ens_val(island_marks, pos); } static void set_island_marks(struct object *obj, struct island_bitmap *marks) { struct island_bitmap *b; - khiter_t pos; + kh_ensitr_t pos; int hash_ret; pos = kh_put_oid_map(island_marks, obj->oid, &hash_ret); @@ -169,7 +169,7 @@ static void set_island_marks(struct object *obj, struct island_bitmap *marks) * parent. */ marks->refcount++; - kh_value(island_marks, pos) = marks; + kh_ens_val(island_marks, pos) = marks; return; } @@ -177,10 +177,10 @@ static void set_island_marks(struct object *obj, struct island_bitmap *marks) * We do have it. Make sure we split any copy-on-write before * updating. */ - b = kh_value(island_marks, pos); + b = kh_ens_val(island_marks, pos); if (b->refcount > 1) { b->refcount--; - b = kh_value(island_marks, pos) = island_bitmap_new(b); + b = kh_ens_val(island_marks, pos) = island_bitmap_new(b); } island_bitmap_or(b, marks); } @@ -272,13 +272,13 @@ void resolve_tree_islands(struct repository *r, struct tree *tree; struct tree_desc desc; struct name_entry entry; - khiter_t pos; + kh_ensitr_t pos; pos = kh_get_oid_map(island_marks, ent->idx.oid); - if (pos >= kh_end(island_marks)) + if (kh_ens_is_end(pos)) continue; - root_marks = kh_value(island_marks, pos); + root_marks = kh_ens_val(island_marks, pos); tree = lookup_tree(r, &ent->idx.oid); if (!tree || parse_tree(tree) < 0) @@ -499,11 +499,11 @@ void load_delta_islands(struct repository *r, int progress) void propagate_island_marks(struct commit *commit) { - khiter_t pos = kh_get_oid_map(island_marks, commit->object.oid); + kh_ensitr_t pos = kh_get_oid_map(island_marks, commit->object.oid); - if (pos < kh_end(island_marks)) { + if (!kh_ens_is_end(pos)) { struct commit_list *p; - struct island_bitmap *root_marks = kh_value(island_marks, pos); + struct island_bitmap *root_marks = kh_ens_val(island_marks, pos); repo_parse_commit(the_repository, commit); set_island_marks(&repo_get_commit_tree(the_repository, commit)->object, @@ -518,7 +518,7 @@ void free_island_marks(void) struct island_bitmap *bitmap; if (island_marks) { - kh_foreach_value(island_marks, bitmap, { + kh_ens_foreach_value(island_marks, bitmap, { if (!--bitmap->refcount) free(bitmap); }); @@ -538,12 +538,12 @@ int compute_pack_layers(struct packing_data *to_pack) for (i = 0; i < to_pack->nr_objects; ++i) { struct object_entry *entry = &to_pack->objects[i]; - khiter_t pos = kh_get_oid_map(island_marks, entry->idx.oid); + kh_ensitr_t pos = kh_get_oid_map(island_marks, entry->idx.oid); oe_set_layer(to_pack, entry, 1); - if (pos < kh_end(island_marks)) { - struct island_bitmap *bitmap = kh_value(island_marks, pos); + if (!kh_ens_is_end(pos)) { + struct island_bitmap *bitmap = kh_ens_val(island_marks, pos); if (island_bitmap_get(bitmap, island_counter_core)) oe_set_layer(to_pack, entry, 0); diff --git a/fsck.c b/fsck.c index 8ded0a473a..4c67e1e64c 100644 --- a/fsck.c +++ b/fsck.c @@ -266,13 +266,13 @@ void fsck_enable_object_names(struct fsck_options *options) const char *fsck_get_object_name(struct fsck_options *options, const struct object_id *oid) { - khiter_t pos; + kh_ensitr_t pos; if (!options->object_names) return NULL; pos = kh_get_oid_map(options->object_names, *oid); - if (pos >= kh_end(options->object_names)) + if (kh_ens_is_end(pos)) return NULL; - return kh_value(options->object_names, pos); + return kh_ens_val(options->object_names, pos); } void fsck_put_object_name(struct fsck_options *options, @@ -281,7 +281,7 @@ void fsck_put_object_name(struct fsck_options *options, { va_list ap; struct strbuf buf = STRBUF_INIT; - khiter_t pos; + kh_ensitr_t pos; int hashret; if (!options->object_names) @@ -292,7 +292,7 @@ void fsck_put_object_name(struct fsck_options *options, return; va_start(ap, fmt); strbuf_vaddf(&buf, fmt, ap); - kh_value(options->object_names, pos) = strbuf_detach(&buf, NULL); + kh_ens_val(options->object_names, pos) = strbuf_detach(&buf, NULL); va_end(ap); } diff --git a/khashl.h b/khashl.h index a1eca0f3fd..fc905e280f 100644 --- a/khashl.h +++ b/khashl.h @@ -245,7 +245,7 @@ typedef struct { khint64_t count:54, bits:8; \ HType##_sub *sub; \ } HType; \ - SCOPE HType *prefix##_init_sub(HType *g, size_t bits) { \ + SCOPE HType *prefix##_init_bits(HType *g, size_t bits) { \ g->bits = bits; \ g->sub = (HType##_sub*)kcalloc(1U<<bits, sizeof(*g->sub)); \ return g; \ @@ -253,7 +253,7 @@ typedef struct { SCOPE HType *prefix##_init(void) { \ HType *g; \ g = (HType*)kcalloc(1, sizeof(*g)); \ - return prefix##_init_sub(g, 0); /* unsure about default */ \ + return prefix##_init_bits(g, 6); /* unsure about default */ \ } \ SCOPE void prefix##_release(HType *g) { \ int t; \ diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 990a9498d7..bbf2090c46 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -465,7 +465,7 @@ static int fill_bitmap_commit(struct bb_commit *ent, static void store_selected(struct bb_commit *ent, struct commit *commit) { struct bitmapped_commit *stored = &writer.selected[ent->idx]; - khiter_t hash_pos; + kh_ensitr_t hash_pos; int hash_ret; stored->bitmap = bitmap_to_ewah(ent->bitmap); @@ -474,7 +474,7 @@ static void store_selected(struct bb_commit *ent, struct commit *commit) if (hash_ret == 0) die("Duplicate entry when writing index: %s", oid_to_hex(&commit->object.oid)); - kh_value(writer.bitmaps, hash_pos) = stored; + kh_ens_val(writer.bitmaps, hash_pos) = stored; } int bitmap_writer_build(struct packing_data *to_pack) diff --git a/pack-bitmap.c b/pack-bitmap.c index 2baeabacee..68cd893dee 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -214,7 +214,7 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, int flags) { struct stored_bitmap *stored; - khiter_t hash_pos; + kh_ensitr_t hash_pos; int ret; stored = xmalloc(sizeof(struct stored_bitmap)); @@ -235,7 +235,7 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, return NULL; } - kh_value(index->bitmaps, hash_pos) = stored; + kh_ens_val(index->bitmaps, hash_pos) = stored; return stored; } @@ -721,7 +721,7 @@ static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_ static size_t xor_items_nr = 0, xor_items_alloc = 0; static int is_corrupt = 0; int xor_flags; - khiter_t hash_pos; + kh_ensitr_t hash_pos; struct bitmap_lookup_table_xor_item *xor_item; if (is_corrupt) @@ -766,8 +766,8 @@ static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_ * has already been stored. So, assign this stored bitmap * to the xor_bitmap. */ - if (hash_pos < kh_end(bitmap_git->bitmaps) && - (xor_bitmap = kh_value(bitmap_git->bitmaps, hash_pos))) + if (!kh_ens_is_end(hash_pos) && + (xor_bitmap = kh_ens_val(bitmap_git->bitmaps, hash_pos))) break; xor_items_nr++; xor_row = triplet.xor_row; @@ -841,9 +841,9 @@ static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_ struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, struct commit *commit) { - khiter_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps, + kh_ensitr_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps, commit->object.oid); - if (hash_pos >= kh_end(bitmap_git->bitmaps)) { + if (kh_ens_is_end(hash_pos)) { struct stored_bitmap *bitmap = NULL; if (!bitmap_git->table_lookup) return NULL; @@ -855,17 +855,17 @@ struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, return NULL; return lookup_stored_bitmap(bitmap); } - return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos)); + return lookup_stored_bitmap(kh_ens_val(bitmap_git->bitmaps, hash_pos)); } static inline int bitmap_position_extended(struct bitmap_index *bitmap_git, const struct object_id *oid) { kh_oid_pos_t *positions = bitmap_git->ext_index.positions; - khiter_t pos = kh_get_oid_pos(positions, *oid); + kh_ensitr_t pos = kh_get_oid_pos(positions, *oid); - if (pos < kh_end(positions)) { - int bitmap_pos = kh_value(positions, pos); + if (!kh_ens_is_end(pos)) { + int bitmap_pos = kh_ens_val(positions, pos); return bitmap_pos + bitmap_num_objects(bitmap_git); } @@ -913,7 +913,7 @@ static int ext_index_add_object(struct bitmap_index *bitmap_git, { struct eindex *eindex = &bitmap_git->ext_index; - khiter_t hash_pos; + kh_ensitr_t hash_pos; int hash_ret; int bitmap_pos; @@ -928,10 +928,10 @@ static int ext_index_add_object(struct bitmap_index *bitmap_git, bitmap_pos = eindex->count; eindex->objects[eindex->count] = object; eindex->hashes[eindex->count] = pack_name_hash(name); - kh_value(eindex->positions, hash_pos) = bitmap_pos; + kh_ens_val(eindex->positions, hash_pos) = bitmap_pos; eindex->count++; } else { - bitmap_pos = kh_value(eindex->positions, hash_pos); + bitmap_pos = kh_ens_val(eindex->positions, hash_pos); } return bitmap_pos + bitmap_num_objects(bitmap_git); @@ -2361,7 +2361,7 @@ int test_bitmap_commits(struct repository *r) die(_("failed to load bitmap indexes")); } - kh_foreach(bitmap_git->bitmaps, oid, value, { + kh_ens_foreach(bitmap_git->bitmaps, oid, value, { printf_ln("%s", oid_to_hex(&oid)); }); @@ -2479,7 +2479,7 @@ void free_bitmap_index(struct bitmap_index *b) ewah_pool_free(b->tags); if (b->bitmaps) { struct stored_bitmap *sb; - kh_foreach_value(b->bitmaps, sb, { + kh_ens_foreach_value(b->bitmaps, sb, { ewah_pool_free(sb->root); free(sb); });
The ->bits field of regular khashl structs is invalid when the ->keys array is NULL. Thus the ensemble *_getp implementation must follow existing *_get and *_getp usage convensions and check the iterator against kh_end(). This fixes a fast-import crash on t3427-rebase-subtree.sh in a subsequent commit to use ensemble implementation for oid_map and oid_pos. --- khashl.h | 62 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/khashl.h b/khashl.h index 8b222a2350..a1eca0f3fd 100644 --- a/khashl.h +++ b/khashl.h @@ -204,22 +204,22 @@ static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 26 __KHASHL_PROTOTYPES(HType, prefix, khkey_t) /* compatibility wrappers to make khash -> khashl migration easier */ -#define __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) \ +#define __KHASH_COMPAT(SCOPE, kh_idx, HType, prefix, khkey_t) \ typedef HType HType##_t; \ SCOPE HType *kh_init_##prefix(void) { return prefix##_init(); } \ SCOPE void kh_release_##prefix(HType *h) { prefix##_release(h); } \ SCOPE void kh_destroy_##prefix(HType *h) { prefix##_destroy(h); } \ SCOPE void kh_clear_##prefix(HType *h) { prefix##_clear(h); } \ - SCOPE khint_t kh_get_##prefix(const HType *h, khkey_t key) { \ + SCOPE kh_idx kh_get_##prefix(const HType *h, khkey_t key) { \ return prefix##_get(h, key); \ } \ SCOPE void kh_resize_##prefix(HType *h, khint_t new_n_buckets) { \ prefix##_resize(h, new_n_buckets); \ } \ - SCOPE khint_t kh_put_##prefix(HType *h, khkey_t key, int *absent) { \ + SCOPE kh_idx kh_put_##prefix(HType *h, khkey_t key, int *absent) { \ return prefix##_put(h, key, absent); \ } \ - SCOPE int kh_del_##prefix(HType *h, khint_t i) { \ + SCOPE int kh_del_##prefix(HType *h, kh_idx i) { \ return prefix##_del(h, i); \ } @@ -245,18 +245,32 @@ typedef struct { khint64_t count:54, bits:8; \ HType##_sub *sub; \ } HType; \ - SCOPE HType *prefix##_init(int bits) { \ - HType *g; \ - g = (HType*)kcalloc(1, sizeof(*g)); \ + SCOPE HType *prefix##_init_sub(HType *g, size_t bits) { \ g->bits = bits; \ g->sub = (HType##_sub*)kcalloc(1U<<bits, sizeof(*g->sub)); \ return g; \ } \ + SCOPE HType *prefix##_init(void) { \ + HType *g; \ + g = (HType*)kcalloc(1, sizeof(*g)); \ + return prefix##_init_sub(g, 0); /* unsure about default */ \ + } \ + SCOPE void prefix##_release(HType *g) { \ + int t; \ + for (t = 0; t < 1<<g->bits; ++t) \ + prefix##_sub_release(&g->sub[t]); \ + kfree(g->sub); \ + } \ SCOPE void prefix##_destroy(HType *g) { \ + if (!g) return; \ + prefix##_release(g); \ + kfree(g); \ + } \ + SCOPE void prefix##_clear(HType *g) { \ int t; \ if (!g) return; \ - for (t = 0; t < 1<<g->bits; ++t) { kfree((void*)g->sub[t].keys); kfree(g->sub[t].used); } \ - kfree(g->sub); kfree(g); \ + for (t = 0; t < 1<<g->bits; ++t) \ + prefix##_sub_clear(&g->sub[t]); \ } \ SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \ khint_t hash, low, ret; \ @@ -266,7 +280,7 @@ typedef struct { low = hash & ((1U<<g->bits) - 1); \ h = &g->sub[low]; \ ret = prefix##_sub_getp_core(h, key, hash); \ - if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \ + if (ret >= kh_end(h)) r.sub = low, r.pos = (khint_t)-1; \ else r.sub = low, r.pos = ret; \ return r; \ } \ @@ -313,7 +327,7 @@ typedef struct { SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } \ - __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + __KHASH_COMPAT(SCOPE, khint_t, HType, prefix, khkey_t) #define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ @@ -328,7 +342,7 @@ typedef struct { SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \ - __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + __KHASH_COMPAT(SCOPE, khint_t, HType, prefix, khkey_t) #define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \ @@ -355,11 +369,15 @@ typedef struct { static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ KHASHE_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ - SCOPE HType *prefix##_init(int bits) { return prefix##_m_init(bits); } \ + SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \ + SCOPE void prefix##_release(HType *h) { prefix##_m_release(h); } \ SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ + SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t ignore) { /* noop */ } \ SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_m_del(h, k); } \ - SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } + SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \ + __KHASH_COMPAT(SCOPE, kh_ensitr_t, HType, prefix, khkey_t) /************************** * Public macro functions * @@ -488,6 +506,18 @@ static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) { code; \ } } +#define kh_ens_foreach(g, kvar, vvar, code) do { \ + size_t t; \ + for (t = 0; t < 1<<g->bits; ++t) \ + kh_foreach(&g->sub[t], kvar, vvar, code); \ +} while (0) + +#define kh_ens_foreach_value(g, vvar, code) do { \ + size_t t; \ + for (t = 0; t < 1<<g->bits; ++t) \ + kh_foreach_value(&g->sub[t], vvar, code); \ +} while (0) + /*! @function @abstract Iterate over the values in the hash table @param h Pointer to the hash table @@ -514,10 +544,10 @@ static inline int oideq_by_value(struct object_id a, struct object_id b) KHASHL_SET_INIT(KH_LOCAL, kh_oid_set, oid_set, struct object_id, oidhash_by_value, oideq_by_value) -KHASHL_MAP_INIT(KH_LOCAL, kh_oid_map, oid_map, struct object_id, void *, +KHASHE_MAP_INIT(KH_LOCAL, kh_oid_map, oid_map, struct object_id, void *, oidhash_by_value, oideq_by_value) -KHASHL_MAP_INIT(KH_LOCAL, kh_oid_pos, oid_pos, struct object_id, int, +KHASHE_MAP_INIT(KH_LOCAL, kh_oid_pos, oid_pos, struct object_id, int, oidhash_by_value, oideq_by_value) #endif /* __AC_KHASHL_H */
khashl is an updated version of khash with significantly less memory overhead than the original khash and similar overall performance. Deletions may be slightly slower, but the majority of our hash tables do not delete individual elements. This patch was made with two additional goals to ease review: 1) minimize changes outside of khash*.h files 2) minimize and document all differences from upstream[1] khashl.h khashl.h differences from upstream: * favor portability constructs from our codebase: MAYBE_UNUSED over klib_unused, inline over kh_inline, and various integer types * disable packed attribute to satisfy -Werror=address-of-packed-member, AFAIK it doesn't change any of our data structures * Port the following commits over from our old khash.h: 9249ca26aca3 (khash: factor out kh_release_*, 2018-10-04) 2756ca4347cb (use REALLOC_ARRAY for changing the allocation size of arrays, 2014-09-16) 5632e838f8fa (khash: clarify that allocations never fail, 2021-07-03) * use our memory allocation wrappers * provide wrappers for compatibility with existing callers using the khash API. The khashl function naming convention is: ${NOUN}_${VERB} while the khash convention is: kh_${VERB}_${NOUN}. The kh_${NAME}_t typedef and naming convention are preserved via __KHASH_COMPAT macro to ease review (despite the `_t' suffix being reserved and typedefs being discouraged in the Linux kernel). * copy relevant API docs over from khash.h for identically named macros * preserve kh_begin, kh_foreach, kh_foreach_value from khash.h since khashl.h doesn't provide them * flesh out KHASHL_{SET,MAP}_INIT wrappers with *_clear, *_resize, and *_release functions [1] git clone https://github.com/attractivechaos/klib.git 2895a16cb55e (support an ensemble of hash tables, 2023-12-18) khashl.h API differences from khash.h which affected this change: * KHASHL_MAP_INIT and KHASHL_SET_INIT macros replace KHASH_INIT * user-supplied hash and equality functions use different names * object-store-ll.h avoided the kh_*_t convention (since I dislike typedef) and was the only place where I had to change a definition. khashl also offers a convenient hash table ensemble API which may be of use for us down the line for gigantic things. Signed-off-by: Eric Wong <e@80x24.org> --- builtin/fast-import.c | 2 +- builtin/fsmonitor--daemon.c | 4 +- delta-islands.c | 4 +- khash.h | 338 ----------------------- khashl.h | 523 ++++++++++++++++++++++++++++++++++++ object-store-ll.h | 2 +- object-store.h | 7 +- oidset.h | 2 +- pack-bitmap.h | 2 +- 9 files changed, 535 insertions(+), 349 deletions(-) delete mode 100644 khash.h create mode 100644 khashl.h diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 71a195ca22..29e50fd675 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -24,7 +24,7 @@ #include "object-store-ll.h" #include "mem-pool.h" #include "commit-reach.h" -#include "khash.h" +#include "khashl.h" #include "date.h" #define PACK_ID_BITS 16 diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 1593713f4c..1c71d96c6d 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -13,7 +13,7 @@ #include "fsmonitor--daemon.h" #include "repository.h" #include "simple-ipc.h" -#include "khash.h" +#include "khashl.h" #include "run-command.h" #include "trace.h" #include "trace2.h" @@ -650,7 +650,7 @@ static int fsmonitor_parse_client_token(const char *buf_token, return 0; } -KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal) +KHASHL_SET_INIT(KH_LOCAL, kh_str, str, const char *, kh_hash_str, kh_eq_str) static int do_handle_client(struct fsmonitor_daemon_state *state, const char *command, diff --git a/delta-islands.c b/delta-islands.c index ee2318d45a..aa35839f15 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -10,14 +10,14 @@ #include "diff.h" #include "progress.h" #include "refs.h" -#include "khash.h" +#include "khashl.h" #include "pack-bitmap.h" #include "pack-objects.h" #include "delta-islands.h" #include "oid-array.h" #include "config.h" -KHASH_INIT(str, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal) +KHASHL_MAP_INIT(KH_LOCAL, kh_str, str, const char *, void *, kh_hash_str, kh_eq_str) static kh_oid_map_t *island_marks; static unsigned island_counter; diff --git a/khash.h b/khash.h deleted file mode 100644 index ff88163177..0000000000 --- a/khash.h +++ /dev/null @@ -1,338 +0,0 @@ -/* The MIT License - - Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk> - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef __AC_KHASH_H -#define __AC_KHASH_H - -#include "hash.h" - -#define AC_VERSION_KHASH_H "0.2.8" - -typedef uint32_t khint32_t; -typedef uint64_t khint64_t; - -typedef khint32_t khint_t; -typedef khint_t khiter_t; - -#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) -#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) -#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) -#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) -#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) -#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) -#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) - -#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) - -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) - -static inline khint_t __ac_X31_hash_string(const char *s) -{ - khint_t h = (khint_t)*s; - if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; - return h; -} - -#define kh_str_hash_func(key) __ac_X31_hash_string(key) -#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) - -static const double __ac_HASH_UPPER = 0.77; - -#define __KHASH_TYPE(name, khkey_t, khval_t) \ - typedef struct kh_##name { \ - khint_t n_buckets, size, n_occupied, upper_bound; \ - khint32_t *flags; \ - khkey_t *keys; \ - khval_t *vals; \ - } kh_##name##_t; - -#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ - kh_##name##_t *kh_init_##name(void); \ - void kh_destroy_##name(kh_##name##_t *h); \ - void kh_clear_##name(kh_##name##_t *h); \ - khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ - void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ - khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ - void kh_del_##name(kh_##name##_t *h, khint_t x); - -#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - SCOPE kh_##name##_t *kh_init_##name(void) { \ - return (kh_##name##_t*)xcalloc(1, sizeof(kh_##name##_t)); \ - } \ - SCOPE void kh_release_##name(kh_##name##_t *h) \ - { \ - free(h->flags); \ - free((void *)h->keys); \ - free((void *)h->vals); \ - } \ - SCOPE void kh_destroy_##name(kh_##name##_t *h) \ - { \ - if (h) { \ - kh_release_##name(h); \ - free(h); \ - } \ - } \ - SCOPE void kh_clear_##name(kh_##name##_t *h) \ - { \ - if (h && h->flags) { \ - memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ - h->size = h->n_occupied = 0; \ - } \ - } \ - SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ - { \ - if (h->n_buckets) { \ - khint_t k, i, last, mask, step = 0; \ - mask = h->n_buckets - 1; \ - k = __hash_func(key); i = k & mask; \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - i = (i + (++step)) & mask; \ - if (i == last) return h->n_buckets; \ - } \ - return __ac_iseither(h->flags, i)? h->n_buckets : i; \ - } else return 0; \ - } \ - SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ - { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ - khint32_t *new_flags = NULL; \ - khint_t j = 1; \ - { \ - kroundup32(new_n_buckets); \ - if (new_n_buckets < 4) new_n_buckets = 4; \ - if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ - else { /* hash table size to be changed (shrink or expand); rehash */ \ - ALLOC_ARRAY(new_flags, __ac_fsize(new_n_buckets)); \ - memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ - if (h->n_buckets < new_n_buckets) { /* expand */ \ - REALLOC_ARRAY(h->keys, new_n_buckets); \ - if (kh_is_map) { \ - REALLOC_ARRAY(h->vals, new_n_buckets); \ - } \ - } /* otherwise shrink */ \ - } \ - } \ - if (j) { /* rehashing is needed */ \ - for (j = 0; j != h->n_buckets; ++j) { \ - if (__ac_iseither(h->flags, j) == 0) { \ - khkey_t key = h->keys[j]; \ - khval_t val; \ - khint_t new_mask; \ - new_mask = new_n_buckets - 1; \ - if (kh_is_map) val = h->vals[j]; \ - __ac_set_isdel_true(h->flags, j); \ - while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ - khint_t k, i, step = 0; \ - k = __hash_func(key); \ - i = k & new_mask; \ - while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ - __ac_set_isempty_false(new_flags, i); \ - if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ - { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ - if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ - __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ - } else { /* write the element and jump out of the loop */ \ - h->keys[i] = key; \ - if (kh_is_map) h->vals[i] = val; \ - break; \ - } \ - } \ - } \ - } \ - if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ - REALLOC_ARRAY(h->keys, new_n_buckets); \ - if (kh_is_map) REALLOC_ARRAY(h->vals, new_n_buckets); \ - } \ - free(h->flags); /* free the working space */ \ - h->flags = new_flags; \ - h->n_buckets = new_n_buckets; \ - h->n_occupied = h->size; \ - h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ - } \ - } \ - SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ - { \ - khint_t x; \ - if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ - if (h->n_buckets > (h->size<<1)) { \ - kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \ - } else { \ - kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \ - } \ - } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ - { \ - khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ - x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ - if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ - else { \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - if (__ac_isdel(h->flags, i)) site = i; \ - i = (i + (++step)) & mask; \ - if (i == last) { x = site; break; } \ - } \ - if (x == h->n_buckets) { \ - if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ - else x = i; \ - } \ - } \ - } \ - if (__ac_isempty(h->flags, x)) { /* not present at all */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; ++h->n_occupied; \ - *ret = 1; \ - } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; \ - *ret = 2; \ - } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ - return x; \ - } \ - SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ - { \ - if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ - __ac_set_isdel_true(h->flags, x); \ - --h->size; \ - } \ - } - -#define KHASH_DECLARE(name, khkey_t, khval_t) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_PROTOTYPES(name, khkey_t, khval_t) - -#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) - -#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - KHASH_INIT2(name, MAYBE_UNUSED static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) - -/* Other convenient macros... */ - -/*! @function - @abstract Test whether a bucket contains data. - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return 1 if containing data; 0 otherwise [int] - */ -#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) - -/*! @function - @abstract Get key given an iterator - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return Key [type of keys] - */ -#define kh_key(h, x) ((h)->keys[x]) - -/*! @function - @abstract Get value given an iterator - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return Value [type of values] - @discussion For hash sets, calling this results in segfault. - */ -#define kh_val(h, x) ((h)->vals[x]) - -/*! @function - @abstract Alias of kh_val() - */ -#define kh_value(h, x) ((h)->vals[x]) - -/*! @function - @abstract Get the start iterator - @param h Pointer to the hash table [khash_t(name)*] - @return The start iterator [khint_t] - */ -#define kh_begin(h) (khint_t)(0) - -/*! @function - @abstract Get the end iterator - @param h Pointer to the hash table [khash_t(name)*] - @return The end iterator [khint_t] - */ -#define kh_end(h) ((h)->n_buckets) - -/*! @function - @abstract Get the number of elements in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @return Number of elements in the hash table [khint_t] - */ -#define kh_size(h) ((h)->size) - -/*! @function - @abstract Get the number of buckets in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @return Number of buckets in the hash table [khint_t] - */ -#define kh_n_buckets(h) ((h)->n_buckets) - -/*! @function - @abstract Iterate over the entries in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @param kvar Variable to which key will be assigned - @param vvar Variable to which value will be assigned - @param code Block of code to execute - */ -#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ - for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ - if (!kh_exist(h,__i)) continue; \ - (kvar) = kh_key(h,__i); \ - (vvar) = kh_val(h,__i); \ - code; \ - } } - -/*! @function - @abstract Iterate over the values in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @param vvar Variable to which value will be assigned - @param code Block of code to execute - */ -#define kh_foreach_value(h, vvar, code) { khint_t __i; \ - for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ - if (!kh_exist(h,__i)) continue; \ - (vvar) = kh_val(h,__i); \ - code; \ - } } - -static inline unsigned int oidhash_by_value(struct object_id oid) -{ - return oidhash(&oid); -} - -static inline int oideq_by_value(struct object_id a, struct object_id b) -{ - return oideq(&a, &b); -} - -KHASH_INIT(oid_set, struct object_id, int, 0, oidhash_by_value, oideq_by_value) - -KHASH_INIT(oid_map, struct object_id, void *, 1, oidhash_by_value, oideq_by_value) - -KHASH_INIT(oid_pos, struct object_id, int, 1, oidhash_by_value, oideq_by_value) - -#endif /* __AC_KHASH_H */ diff --git a/khashl.h b/khashl.h new file mode 100644 index 0000000000..8b222a2350 --- /dev/null +++ b/khashl.h @@ -0,0 +1,523 @@ +/* The MIT License + + Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk> + Copyright (c) 2019-2023 by Attractive Chaos <attractor@live.co.uk> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef __AC_KHASHL_H +#define __AC_KHASHL_H + +#include "hash.h" + +#define AC_VERSION_KHASHL_H "0.2" + +typedef uint32_t khint32_t; +typedef uint64_t khint64_t; + +typedef khint32_t khint_t; +typedef khint_t khiter_t; + +#define kh_inline inline /* portably handled elsewhere */ +#define KH_LOCAL static kh_inline MAYBE_UNUSED + +#ifndef kcalloc +#define kcalloc(N,Z) xcalloc(N,Z) +#endif +#ifndef kfree +#define kfree(P) free(P) +#endif + +/**************************** + * Simple private functions * + ****************************/ + +#define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U) +#define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU)) +#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU))) + +#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5) + +static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); } + +/******************* + * Hash table base * + *******************/ + +#define __KHASHL_TYPE(HType, khkey_t) \ + typedef struct HType { \ + khint_t bits, count; \ + khint32_t *used; \ + khkey_t *keys; \ + } HType; + +#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \ + extern HType *prefix##_init(void); \ + extern void prefix##_destroy(HType *h); \ + extern void prefix##_clear(HType *h); \ + extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \ + extern void prefix##_resize(HType *h, khint_t new_n_buckets); \ + extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \ + extern void prefix##_del(HType *h, khint_t k); + +#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + SCOPE HType *prefix##_init(void) { \ + return (HType*)kcalloc(1, sizeof(HType)); \ + } \ + SCOPE void prefix##_release(HType *h) { \ + kfree((void *)h->keys); kfree(h->used); \ + } \ + SCOPE void prefix##_destroy(HType *h) { \ + if (!h) return; \ + prefix##_release(h); \ + kfree(h); \ + } \ + SCOPE void prefix##_clear(HType *h) { \ + if (h && h->used) { \ + khint_t n_buckets = (khint_t)1U << h->bits; \ + memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \ + h->count = 0; \ + } \ + } + +#define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE khint_t prefix##_getp_core(const HType *h, const khkey_t *key, khint_t hash) { \ + khint_t i, last, n_buckets, mask; \ + if (h->keys == 0) return 0; \ + n_buckets = (khint_t)1U << h->bits; \ + mask = n_buckets - 1U; \ + i = last = __kh_h2b(hash, h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) return n_buckets; \ + } \ + return !__kh_used(h->used, i)? n_buckets : i; \ + } \ + SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { return prefix##_getp_core(h, key, __hash_fn(*key)); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp_core(h, &key, __hash_fn(key)); } + +#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { \ + khint32_t *new_used = 0; \ + khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \ + while ((x >>= 1) != 0) ++j; \ + if (new_n_buckets & (new_n_buckets - 1)) ++j; \ + new_bits = j > 2? j : 2; \ + new_n_buckets = (khint_t)1U << new_bits; \ + if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return; /* noop, requested size is too small */ \ + new_used = (khint32_t*)kcalloc(__kh_fsize(new_n_buckets), sizeof(khint32_t)); \ + n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \ + if (n_buckets < new_n_buckets) { /* expand */ \ + REALLOC_ARRAY(h->keys, new_n_buckets); \ + } /* otherwise shrink */ \ + new_mask = new_n_buckets - 1; \ + for (j = 0; j != n_buckets; ++j) { \ + khkey_t key; \ + if (!__kh_used(h->used, j)) continue; \ + key = h->keys[j]; \ + __kh_set_unused(h->used, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t i; \ + i = __kh_h2b(__hash_fn(key), new_bits); \ + while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \ + __kh_set_used(new_used, i); \ + if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + break; \ + } \ + } \ + } \ + if (n_buckets > new_n_buckets) /* shrink the hash table */ \ + REALLOC_ARRAY(h->keys, new_n_buckets); \ + kfree(h->used); /* free the working space */ \ + h->used = new_used, h->bits = new_bits; \ + } + +#define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE khint_t prefix##_putp_core(HType *h, const khkey_t *key, khint_t hash, int *absent) { \ + khint_t n_buckets, i, last, mask; \ + n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \ + *absent = -1; \ + if (h->count >= (n_buckets>>1) + (n_buckets>>2)) { /* rehashing */ \ + prefix##_resize(h, n_buckets + 1U); \ + n_buckets = (khint_t)1U<<h->bits; \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + mask = n_buckets - 1; \ + i = last = __kh_h2b(hash, h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) break; \ + } \ + if (!__kh_used(h->used, i)) { /* not present at all */ \ + h->keys[i] = *key; \ + __kh_set_used(h->used, i); \ + ++h->count; \ + *absent = 1; \ + } else *absent = 0; /* Don't touch h->keys[i] if present */ \ + return i; \ + } \ + SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { return prefix##_putp_core(h, key, __hash_fn(*key), absent); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp_core(h, &key, __hash_fn(key), absent); } + +#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \ + SCOPE int prefix##_del(HType *h, khint_t i) { \ + khint_t j = i, k, mask, n_buckets; \ + if (h->keys == 0) return 0; \ + n_buckets = (khint_t)1U<<h->bits; \ + mask = n_buckets - 1U; \ + while (1) { \ + j = (j + 1U) & mask; \ + if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \ + k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \ + if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \ + h->keys[i] = h->keys[j], i = j; \ + } \ + __kh_set_unused(h->used, i); \ + --h->count; \ + return 1; \ + } + +#define KHASHL_DECLARE(HType, prefix, khkey_t) \ + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_PROTOTYPES(HType, prefix, khkey_t) + +/* compatibility wrappers to make khash -> khashl migration easier */ +#define __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) \ + typedef HType HType##_t; \ + SCOPE HType *kh_init_##prefix(void) { return prefix##_init(); } \ + SCOPE void kh_release_##prefix(HType *h) { prefix##_release(h); } \ + SCOPE void kh_destroy_##prefix(HType *h) { prefix##_destroy(h); } \ + SCOPE void kh_clear_##prefix(HType *h) { prefix##_clear(h); } \ + SCOPE khint_t kh_get_##prefix(const HType *h, khkey_t key) { \ + return prefix##_get(h, key); \ + } \ + SCOPE void kh_resize_##prefix(HType *h, khint_t new_n_buckets) { \ + prefix##_resize(h, new_n_buckets); \ + } \ + SCOPE khint_t kh_put_##prefix(HType *h, khkey_t key, int *absent) { \ + return prefix##_put(h, key, absent); \ + } \ + SCOPE int kh_del_##prefix(HType *h, khint_t i) { \ + return prefix##_del(h, i); \ + } + +#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) + +/*************************** + * Ensemble of hash tables * + ***************************/ + +typedef struct { + khint_t sub, pos; +} kh_ensitr_t; + +#define KHASHE_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + KHASHL_INIT(KH_LOCAL, HType##_sub, prefix##_sub, khkey_t, __hash_fn, __hash_eq) \ + typedef struct HType { \ + khint64_t count:54, bits:8; \ + HType##_sub *sub; \ + } HType; \ + SCOPE HType *prefix##_init(int bits) { \ + HType *g; \ + g = (HType*)kcalloc(1, sizeof(*g)); \ + g->bits = bits; \ + g->sub = (HType##_sub*)kcalloc(1U<<bits, sizeof(*g->sub)); \ + return g; \ + } \ + SCOPE void prefix##_destroy(HType *g) { \ + int t; \ + if (!g) return; \ + for (t = 0; t < 1<<g->bits; ++t) { kfree((void*)g->sub[t].keys); kfree(g->sub[t].used); } \ + kfree(g->sub); kfree(g); \ + } \ + SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \ + khint_t hash, low, ret; \ + kh_ensitr_t r; \ + HType##_sub *h; \ + hash = __hash_fn(*key); \ + low = hash & ((1U<<g->bits) - 1); \ + h = &g->sub[low]; \ + ret = prefix##_sub_getp_core(h, key, hash); \ + if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \ + else r.sub = low, r.pos = ret; \ + return r; \ + } \ + SCOPE kh_ensitr_t prefix##_get(const HType *g, const khkey_t key) { return prefix##_getp(g, &key); } \ + SCOPE kh_ensitr_t prefix##_putp(HType *g, const khkey_t *key, int *absent) { \ + khint_t hash, low, ret; \ + kh_ensitr_t r; \ + HType##_sub *h; \ + hash = __hash_fn(*key); \ + low = hash & ((1U<<g->bits) - 1); \ + h = &g->sub[low]; \ + ret = prefix##_sub_putp_core(h, key, hash, absent); \ + if (*absent) ++g->count; \ + if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \ + else r.sub = low, r.pos = ret; \ + return r; \ + } \ + SCOPE kh_ensitr_t prefix##_put(HType *g, const khkey_t key, int *absent) { return prefix##_putp(g, &key, absent); } \ + SCOPE int prefix##_del(HType *g, kh_ensitr_t itr) { \ + HType##_sub *h = &g->sub[itr.sub]; \ + int ret; \ + ret = prefix##_sub_del(h, itr.pos); \ + if (ret) --g->count; \ + return ret; \ + } + +/***************************** + * More convenient interface * + *****************************/ + +#define __kh_packed /* noop, we use -Werror=address-of-packed-member */ +#define __kh_cached_hash(x) ((x).hash) + +#define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; } __kh_packed HType##_s_bucket_t; \ + static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \ + SCOPE void prefix##_release(HType *h) { prefix##_s_release(h); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \ + SCOPE void prefix##_clear(HType *h) { prefix##_s_clear(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } \ + __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + +#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ + static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \ + SCOPE void prefix##_release(HType *h) { prefix##_m_release(h); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ + SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_m_resize(h, new_n_buckets); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \ + __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + +#define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \ + static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); } + +#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; khint_t hash; } __kh_packed HType##_cm_bucket_t; \ + static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); } + +#define KHASHE_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ + static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHE_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ + SCOPE HType *prefix##_init(int bits) { return prefix##_m_init(bits); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ + SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_m_del(h, k); } \ + SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } + +/************************** + * Public macro functions * + **************************/ + +#define kh_bucket(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table + @return Number of elements in the hash table [khint_t] + */ +#define kh_size(h) ((h)->count) + +#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table + @return The end iterator [khint_t] + */ +#define kh_end(h) kh_capacity(h) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table + @param x Iterator to the bucket [khint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x].key) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table + @param x Iterator to the bucket [khint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->keys[x].val) + +/*! @function + @abstract Alias of kh_val() + */ +#define kh_value(h, x) kh_val(h, x) + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table + @param x Iterator to the bucket [khint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) __kh_used((h)->used, (x)) + +#define kh_ens_key(g, x) kh_key(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_val(g, x) kh_val(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_exist(g, x) kh_exist(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_is_end(x) ((x).pos == (khint_t)-1) +#define kh_ens_size(g) ((g)->count) + +/************************************** + * Common hash and equality functions * + **************************************/ + +#define kh_eq_generic(a, b) ((a) == (b)) +#define kh_eq_str(a, b) (strcmp((a), (b)) == 0) +#define kh_hash_dummy(x) ((khint_t)(x)) + +static kh_inline khint_t kh_hash_uint32(khint_t key) { + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} + +static kh_inline khint_t kh_hash_uint64(khint64_t key) { + key = ~key + (key << 21); + key = key ^ key >> 24; + key = (key + (key << 3)) + (key << 8); + key = key ^ key >> 14; + key = (key + (key << 2)) + (key << 4); + key = key ^ key >> 28; + key = key + (key << 31); + return (khint_t)key; +} + +#define KH_FNV_SEED 11 + +static kh_inline khint_t kh_hash_str(const char *s) { /* FNV1a */ + khint_t h = KH_FNV_SEED ^ 2166136261U; + const unsigned char *t = (const unsigned char*)s; + for (; *t; ++t) + h ^= *t, h *= 16777619; + return h; +} + +static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) { + khint_t h = KH_FNV_SEED ^ 2166136261U; + int i; + for (i = 0; i < len; ++i) + h ^= s[i], h *= 16777619; + return h; +} + +/*! @function + @abstract Get the start iterator + @param h Pointer to the hash table + @return The start iterator [khint_t] + */ +#define kh_begin(h) (khint_t)(0) + +/*! @function + @abstract Iterate over the entries in the hash table + @param h Pointer to the hash table + @param kvar Variable to which key will be assigned + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (kvar) = kh_key(h,__i); \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/*! @function + @abstract Iterate over the values in the hash table + @param h Pointer to the hash table + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach_value(h, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +static inline unsigned int oidhash_by_value(struct object_id oid) +{ + return oidhash(&oid); +} + +static inline int oideq_by_value(struct object_id a, struct object_id b) +{ + return oideq(&a, &b); +} + +KHASHL_SET_INIT(KH_LOCAL, kh_oid_set, oid_set, struct object_id, + oidhash_by_value, oideq_by_value) + +KHASHL_MAP_INIT(KH_LOCAL, kh_oid_map, oid_map, struct object_id, void *, + oidhash_by_value, oideq_by_value) + +KHASHL_MAP_INIT(KH_LOCAL, kh_oid_pos, oid_pos, struct object_id, int, + oidhash_by_value, oideq_by_value) + +#endif /* __AC_KHASHL_H */ diff --git a/object-store-ll.h b/object-store-ll.h index 26a3895c82..401c4beff5 100644 --- a/object-store-ll.h +++ b/object-store-ll.h @@ -160,7 +160,7 @@ struct raw_object_store { */ struct object_directory *odb; struct object_directory **odb_tail; - struct kh_odb_path_map *odb_by_path; + struct odb_path_map *odb_by_path; int loaded_alternates; diff --git a/object-store.h b/object-store.h index 1b3e3d7d01..3db4802e86 100644 --- a/object-store.h +++ b/object-store.h @@ -1,11 +1,12 @@ #ifndef OBJECT_STORE_H #define OBJECT_STORE_H -#include "khash.h" +#include "khashl.h" #include "dir.h" #include "object-store-ll.h" -KHASH_INIT(odb_path_map, const char * /* key: odb_path */, - struct object_directory *, 1, fspathhash, fspatheq) +KHASHL_MAP_INIT(KH_LOCAL, odb_path_map, odb_path_map, + const char * /* key: odb_path */, struct object_directory *, + fspathhash, fspatheq) #endif /* OBJECT_STORE_H */ diff --git a/oidset.h b/oidset.h index 262f4256d6..17af1b6708 100644 --- a/oidset.h +++ b/oidset.h @@ -1,7 +1,7 @@ #ifndef OIDSET_H #define OIDSET_H -#include "khash.h" +#include "khashl.h" /** * This API is similar to oid-array, in that it maintains a set of object ids diff --git a/pack-bitmap.h b/pack-bitmap.h index c7dea13217..d018365f24 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -2,7 +2,7 @@ #define PACK_BITMAP_H #include "ewah/ewok.h" -#include "khash.h" +#include "khashl.h" #include "pack.h" #include "pack-objects.h" #include "string-list.h"
In order to ease a potential migration to from khash to khashl, use the kh_size() macro instead of accessing the .size field directly. Signed-off-by: Eric Wong <e@80x24.org> --- list-objects-filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/list-objects-filter.c b/list-objects-filter.c index 4346f8da45..440f112d23 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -704,7 +704,7 @@ static void filter_combine__free(void *filter_data) for (sub = 0; sub < d->nr; sub++) { list_objects_filter__free(d->sub[sub].filter); oidset_clear(&d->sub[sub].seen); - if (d->sub[sub].omits.set.size) + if (kh_size(&d->sub[sub].omits.set)) BUG("expected oidset to be cleared already"); } free(d->sub);
--- builtin/fsmonitor--daemon.c | 2 +- delta-islands.c | 2 +- khash.h | 615 +++++++++++++++++++++++------------- object-store-ll.h | 2 +- object-store.h | 5 +- 5 files changed, 406 insertions(+), 220 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 1593713f4c..c4a064201a 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -650,7 +650,7 @@ static int fsmonitor_parse_client_token(const char *buf_token, return 0; } -KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal) +KHASHL_SET_INIT(KH_LOCAL, kh_str, str, const char *, kh_hash_str, kh_eq_str) static int do_handle_client(struct fsmonitor_daemon_state *state, const char *command, diff --git a/delta-islands.c b/delta-islands.c index ee2318d45a..c31c7f75f1 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -17,7 +17,7 @@ #include "oid-array.h" #include "config.h" -KHASH_INIT(str, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal) +KHASHL_MAP_INIT(KH_LOCAL, kh_str, str, const char *, void *, kh_hash_str, kh_eq_str) static kh_oid_map_t *island_marks; static unsigned island_counter; diff --git a/khash.h b/khash.h index ff88163177..ade7614816 100644 --- a/khash.h +++ b/khash.h @@ -1,6 +1,7 @@ /* The MIT License Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk> + Copyright (c) 2019-2023 by Attractive Chaos <attractor@live.co.uk> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -23,12 +24,12 @@ SOFTWARE. */ -#ifndef __AC_KHASH_H -#define __AC_KHASH_H +#ifndef __AC_KHASHL_H +#define __AC_KHASHL_H #include "hash.h" -#define AC_VERSION_KHASH_H "0.2.8" +#define AC_VERSION_KHASHL_H "0.2" typedef uint32_t khint32_t; typedef uint64_t khint64_t; @@ -36,210 +37,351 @@ typedef uint64_t khint64_t; typedef khint32_t khint_t; typedef khint_t khiter_t; -#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) -#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) -#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) -#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) -#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) -#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) -#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) +#define kh_inline inline /* portably handled elsewhere */ +#define KH_LOCAL static kh_inline MAYBE_UNUSED -#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) +#ifndef kcalloc +#define kcalloc(N,Z) xcalloc(N,Z) +#endif +#ifndef kfree +#define kfree(P) free(P) +#endif -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +/**************************** + * Simple private functions * + ****************************/ -static inline khint_t __ac_X31_hash_string(const char *s) -{ - khint_t h = (khint_t)*s; - if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; - return h; -} +#define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U) +#define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU)) +#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU))) -#define kh_str_hash_func(key) __ac_X31_hash_string(key) -#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) +#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5) -static const double __ac_HASH_UPPER = 0.77; +static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); } -#define __KHASH_TYPE(name, khkey_t, khval_t) \ - typedef struct kh_##name { \ - khint_t n_buckets, size, n_occupied, upper_bound; \ - khint32_t *flags; \ +/******************* + * Hash table base * + *******************/ + +#define __KHASHL_TYPE(HType, khkey_t) \ + typedef struct HType { \ + khint_t bits, count; \ + khint32_t *used; \ khkey_t *keys; \ - khval_t *vals; \ - } kh_##name##_t; - -#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ - kh_##name##_t *kh_init_##name(void); \ - void kh_destroy_##name(kh_##name##_t *h); \ - void kh_clear_##name(kh_##name##_t *h); \ - khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ - void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ - khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ - void kh_del_##name(kh_##name##_t *h, khint_t x); - -#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - SCOPE kh_##name##_t *kh_init_##name(void) { \ - return (kh_##name##_t*)xcalloc(1, sizeof(kh_##name##_t)); \ - } \ - SCOPE void kh_release_##name(kh_##name##_t *h) \ - { \ - free(h->flags); \ - free((void *)h->keys); \ - free((void *)h->vals); \ - } \ - SCOPE void kh_destroy_##name(kh_##name##_t *h) \ - { \ - if (h) { \ - kh_release_##name(h); \ - free(h); \ - } \ - } \ - SCOPE void kh_clear_##name(kh_##name##_t *h) \ - { \ - if (h && h->flags) { \ - memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ - h->size = h->n_occupied = 0; \ - } \ - } \ - SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ - { \ - if (h->n_buckets) { \ - khint_t k, i, last, mask, step = 0; \ - mask = h->n_buckets - 1; \ - k = __hash_func(key); i = k & mask; \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - i = (i + (++step)) & mask; \ - if (i == last) return h->n_buckets; \ - } \ - return __ac_iseither(h->flags, i)? h->n_buckets : i; \ - } else return 0; \ - } \ - SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ - { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ - khint32_t *new_flags = NULL; \ - khint_t j = 1; \ - { \ - kroundup32(new_n_buckets); \ - if (new_n_buckets < 4) new_n_buckets = 4; \ - if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ - else { /* hash table size to be changed (shrink or expand); rehash */ \ - ALLOC_ARRAY(new_flags, __ac_fsize(new_n_buckets)); \ - memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ - if (h->n_buckets < new_n_buckets) { /* expand */ \ - REALLOC_ARRAY(h->keys, new_n_buckets); \ - if (kh_is_map) { \ - REALLOC_ARRAY(h->vals, new_n_buckets); \ - } \ - } /* otherwise shrink */ \ - } \ - } \ - if (j) { /* rehashing is needed */ \ - for (j = 0; j != h->n_buckets; ++j) { \ - if (__ac_iseither(h->flags, j) == 0) { \ - khkey_t key = h->keys[j]; \ - khval_t val; \ - khint_t new_mask; \ - new_mask = new_n_buckets - 1; \ - if (kh_is_map) val = h->vals[j]; \ - __ac_set_isdel_true(h->flags, j); \ - while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ - khint_t k, i, step = 0; \ - k = __hash_func(key); \ - i = k & new_mask; \ - while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ - __ac_set_isempty_false(new_flags, i); \ - if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ - { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ - if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ - __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ - } else { /* write the element and jump out of the loop */ \ - h->keys[i] = key; \ - if (kh_is_map) h->vals[i] = val; \ - break; \ - } \ - } \ - } \ - } \ - if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ - REALLOC_ARRAY(h->keys, new_n_buckets); \ - if (kh_is_map) REALLOC_ARRAY(h->vals, new_n_buckets); \ - } \ - free(h->flags); /* free the working space */ \ - h->flags = new_flags; \ - h->n_buckets = new_n_buckets; \ - h->n_occupied = h->size; \ - h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ - } \ - } \ - SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ - { \ - khint_t x; \ - if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ - if (h->n_buckets > (h->size<<1)) { \ - kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \ - } else { \ - kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \ - } \ + } HType; + +#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \ + extern HType *prefix##_init(void); \ + extern void prefix##_destroy(HType *h); \ + extern void prefix##_clear(HType *h); \ + extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \ + extern void prefix##_resize(HType *h, khint_t new_n_buckets); \ + extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \ + extern void prefix##_del(HType *h, khint_t k); + +#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + SCOPE HType *prefix##_init(void) { \ + return (HType*)kcalloc(1, sizeof(HType)); \ + } \ + SCOPE void prefix##_release(HType *h) { \ + kfree((void *)h->keys); kfree(h->used); \ + } \ + SCOPE void prefix##_destroy(HType *h) { \ + if (!h) return; \ + prefix##_release(h); \ + kfree(h); \ + } \ + SCOPE void prefix##_clear(HType *h) { \ + if (h && h->used) { \ + khint_t n_buckets = (khint_t)1U << h->bits; \ + memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \ + h->count = 0; \ + } \ + } + +#define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE khint_t prefix##_getp_core(const HType *h, const khkey_t *key, khint_t hash) { \ + khint_t i, last, n_buckets, mask; \ + if (h->keys == 0) return 0; \ + n_buckets = (khint_t)1U << h->bits; \ + mask = n_buckets - 1U; \ + i = last = __kh_h2b(hash, h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) return n_buckets; \ + } \ + return !__kh_used(h->used, i)? n_buckets : i; \ + } \ + SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { return prefix##_getp_core(h, key, __hash_fn(*key)); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp_core(h, &key, __hash_fn(key)); } + +#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { \ + khint32_t *new_used = 0; \ + khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \ + while ((x >>= 1) != 0) ++j; \ + if (new_n_buckets & (new_n_buckets - 1)) ++j; \ + new_bits = j > 2? j : 2; \ + new_n_buckets = (khint_t)1U << new_bits; \ + if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return; /* noop, requested size is too small */ \ + new_used = (khint32_t*)kcalloc(__kh_fsize(new_n_buckets), sizeof(khint32_t)); \ + n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \ + if (n_buckets < new_n_buckets) { /* expand */ \ + REALLOC_ARRAY(h->keys, new_n_buckets); \ + } /* otherwise shrink */ \ + new_mask = new_n_buckets - 1; \ + for (j = 0; j != n_buckets; ++j) { \ + khkey_t key; \ + if (!__kh_used(h->used, j)) continue; \ + key = h->keys[j]; \ + __kh_set_unused(h->used, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t i; \ + i = __kh_h2b(__hash_fn(key), new_bits); \ + while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \ + __kh_set_used(new_used, i); \ + if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + break; \ + } \ + } \ + } \ + if (n_buckets > new_n_buckets) /* shrink the hash table */ \ + REALLOC_ARRAY(h->keys, new_n_buckets); \ + kfree(h->used); /* free the working space */ \ + h->used = new_used, h->bits = new_bits; \ + } + +#define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE khint_t prefix##_putp_core(HType *h, const khkey_t *key, khint_t hash, int *absent) { \ + khint_t n_buckets, i, last, mask; \ + n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \ + *absent = -1; \ + if (h->count >= (n_buckets>>1) + (n_buckets>>2)) { /* rehashing */ \ + prefix##_resize(h, n_buckets + 1U); \ + n_buckets = (khint_t)1U<<h->bits; \ } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ - { \ - khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ - x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ - if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ - else { \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - if (__ac_isdel(h->flags, i)) site = i; \ - i = (i + (++step)) & mask; \ - if (i == last) { x = site; break; } \ - } \ - if (x == h->n_buckets) { \ - if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ - else x = i; \ - } \ - } \ - } \ - if (__ac_isempty(h->flags, x)) { /* not present at all */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; ++h->n_occupied; \ - *ret = 1; \ - } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; \ - *ret = 2; \ - } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ - return x; \ - } \ - SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ - { \ - if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ - __ac_set_isdel_true(h->flags, x); \ - --h->size; \ - } \ + mask = n_buckets - 1; \ + i = last = __kh_h2b(hash, h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) break; \ + } \ + if (!__kh_used(h->used, i)) { /* not present at all */ \ + h->keys[i] = *key; \ + __kh_set_used(h->used, i); \ + ++h->count; \ + *absent = 1; \ + } else *absent = 0; /* Don't touch h->keys[i] if present */ \ + return i; \ + } \ + SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { return prefix##_putp_core(h, key, __hash_fn(*key), absent); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp_core(h, &key, __hash_fn(key), absent); } + +#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \ + SCOPE int prefix##_del(HType *h, khint_t i) { \ + khint_t j = i, k, mask, n_buckets; \ + if (h->keys == 0) return 0; \ + n_buckets = (khint_t)1U<<h->bits; \ + mask = n_buckets - 1U; \ + while (1) { \ + j = (j + 1U) & mask; \ + if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \ + k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \ + if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \ + h->keys[i] = h->keys[j], i = j; \ + } \ + __kh_set_unused(h->used, i); \ + --h->count; \ + return 1; \ + } + +#define KHASHL_DECLARE(HType, prefix, khkey_t) \ + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_PROTOTYPES(HType, prefix, khkey_t) + +/* compatibility wrappers to make khash -> khashl migration easier */ +#define __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) \ + typedef HType HType##_t; \ + SCOPE HType *kh_init_##prefix(void) { return prefix##_init(); } \ + SCOPE void kh_release_##prefix(HType *h) { prefix##_release(h); } \ + SCOPE void kh_destroy_##prefix(HType *h) { prefix##_destroy(h); } \ + SCOPE void kh_clear_##prefix(HType *h) { prefix##_clear(h); } \ + SCOPE khint_t kh_get_##prefix(const HType *h, khkey_t key) { \ + return prefix##_get(h, key); \ + } \ + SCOPE void kh_resize_##prefix(HType *h, khint_t new_n_buckets) { \ + prefix##_resize(h, new_n_buckets); \ + } \ + SCOPE khint_t kh_put_##prefix(HType *h, khkey_t key, int *absent) { \ + return prefix##_put(h, key, absent); \ + } \ + SCOPE int kh_del_##prefix(HType *h, khint_t i) { \ + return prefix##_del(h, i); \ } -#define KHASH_DECLARE(name, khkey_t, khval_t) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_PROTOTYPES(name, khkey_t, khval_t) +#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) + +/*************************** + * Ensemble of hash tables * + ***************************/ + +typedef struct { + khint_t sub, pos; +} kh_ensitr_t; + +#define KHASHE_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + KHASHL_INIT(KH_LOCAL, HType##_sub, prefix##_sub, khkey_t, __hash_fn, __hash_eq) \ + typedef struct HType { \ + khint64_t count:54, bits:8; \ + HType##_sub *sub; \ + } HType; \ + SCOPE HType *prefix##_init(int bits) { \ + HType *g; \ + g = (HType*)kcalloc(1, sizeof(*g)); \ + g->bits = bits; \ + g->sub = (HType##_sub*)kcalloc(1U<<bits, sizeof(*g->sub)); \ + return g; \ + } \ + SCOPE void prefix##_destroy(HType *g) { \ + int t; \ + if (!g) return; \ + for (t = 0; t < 1<<g->bits; ++t) { kfree((void*)g->sub[t].keys); kfree(g->sub[t].used); } \ + kfree(g->sub); kfree(g); \ + } \ + SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \ + khint_t hash, low, ret; \ + kh_ensitr_t r; \ + HType##_sub *h; \ + hash = __hash_fn(*key); \ + low = hash & ((1U<<g->bits) - 1); \ + h = &g->sub[low]; \ + ret = prefix##_sub_getp_core(h, key, hash); \ + if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \ + else r.sub = low, r.pos = ret; \ + return r; \ + } \ + SCOPE kh_ensitr_t prefix##_get(const HType *g, const khkey_t key) { return prefix##_getp(g, &key); } \ + SCOPE kh_ensitr_t prefix##_putp(HType *g, const khkey_t *key, int *absent) { \ + khint_t hash, low, ret; \ + kh_ensitr_t r; \ + HType##_sub *h; \ + hash = __hash_fn(*key); \ + low = hash & ((1U<<g->bits) - 1); \ + h = &g->sub[low]; \ + ret = prefix##_sub_putp_core(h, key, hash, absent); \ + if (*absent) ++g->count; \ + if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \ + else r.sub = low, r.pos = ret; \ + return r; \ + } \ + SCOPE kh_ensitr_t prefix##_put(HType *g, const khkey_t key, int *absent) { return prefix##_putp(g, &key, absent); } \ + SCOPE int prefix##_del(HType *g, kh_ensitr_t itr) { \ + HType##_sub *h = &g->sub[itr.sub]; \ + int ret; \ + ret = prefix##_sub_del(h, itr.pos); \ + if (ret) --g->count; \ + return ret; \ + } -#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) +/***************************** + * More convenient interface * + *****************************/ + +#define __kh_packed /* noop, we use -Werror=address-of-packed-member */ +#define __kh_cached_hash(x) ((x).hash) + +#define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; } __kh_packed HType##_s_bucket_t; \ + static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \ + SCOPE void prefix##_release(HType *h) { prefix##_s_release(h); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \ + SCOPE void prefix##_clear(HType *h) { prefix##_s_clear(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } \ + __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + +#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ + static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \ + SCOPE void prefix##_release(HType *h) { prefix##_m_release(h); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ + SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_m_resize(h, new_n_buckets); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \ + __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + +#define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \ + static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); } + +#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; khint_t hash; } __kh_packed HType##_cm_bucket_t; \ + static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); } + +#define KHASHE_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ + static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHE_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ + SCOPE HType *prefix##_init(int bits) { return prefix##_m_init(bits); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ + SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_m_del(h, k); } \ + SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } + +/************************** + * Public macro functions * + **************************/ + +#define kh_bucket(h, x) ((h)->keys[x]) -#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - KHASH_INIT2(name, MAYBE_UNUSED static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of elements in the hash table [khint_t] + */ +#define kh_size(h) ((h)->count) -/* Other convenient macros... */ +#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U) /*! @function - @abstract Test whether a bucket contains data. + @abstract Get the end iterator @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return 1 if containing data; 0 otherwise [int] + @return The end iterator [khint_t] */ -#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) +#define kh_end(h) kh_capacity(h) /*! @function @abstract Get key given an iterator @@ -247,7 +389,7 @@ static const double __ac_HASH_UPPER = 0.77; @param x Iterator to the bucket [khint_t] @return Key [type of keys] */ -#define kh_key(h, x) ((h)->keys[x]) +#define kh_key(h, x) ((h)->keys[x].key) /*! @function @abstract Get value given an iterator @@ -256,40 +398,80 @@ static const double __ac_HASH_UPPER = 0.77; @return Value [type of values] @discussion For hash sets, calling this results in segfault. */ -#define kh_val(h, x) ((h)->vals[x]) +#define kh_val(h, x) ((h)->keys[x].val) /*! @function @abstract Alias of kh_val() */ -#define kh_value(h, x) ((h)->vals[x]) +#define kh_value(h, x) kh_val(h, x) /*! @function - @abstract Get the start iterator + @abstract Test whether a bucket contains data. @param h Pointer to the hash table [khash_t(name)*] - @return The start iterator [khint_t] + @param x Iterator to the bucket [khint_t] + @return 1 if containing data; 0 otherwise [int] */ -#define kh_begin(h) (khint_t)(0) +#define kh_exist(h, x) __kh_used((h)->used, (x)) + +#define kh_ens_key(g, x) kh_key(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_val(g, x) kh_val(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_exist(g, x) kh_exist(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_is_end(x) ((x).pos == (khint_t)-1) +#define kh_ens_size(g) ((g)->count) + +/************************************** + * Common hash and equality functions * + **************************************/ + +#define kh_eq_generic(a, b) ((a) == (b)) +#define kh_eq_str(a, b) (strcmp((a), (b)) == 0) +#define kh_hash_dummy(x) ((khint_t)(x)) + +static kh_inline khint_t kh_hash_uint32(khint_t key) { + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} -/*! @function - @abstract Get the end iterator - @param h Pointer to the hash table [khash_t(name)*] - @return The end iterator [khint_t] - */ -#define kh_end(h) ((h)->n_buckets) +static kh_inline khint_t kh_hash_uint64(khint64_t key) { + key = ~key + (key << 21); + key = key ^ key >> 24; + key = (key + (key << 3)) + (key << 8); + key = key ^ key >> 14; + key = (key + (key << 2)) + (key << 4); + key = key ^ key >> 28; + key = key + (key << 31); + return (khint_t)key; +} -/*! @function - @abstract Get the number of elements in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @return Number of elements in the hash table [khint_t] - */ -#define kh_size(h) ((h)->size) +#define KH_FNV_SEED 11 + +static kh_inline khint_t kh_hash_str(const char *s) { /* FNV1a */ + khint_t h = KH_FNV_SEED ^ 2166136261U; + const unsigned char *t = (const unsigned char*)s; + for (; *t; ++t) + h ^= *t, h *= 16777619; + return h; +} + +static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) { + khint_t h = KH_FNV_SEED ^ 2166136261U; + int i; + for (i = 0; i < len; ++i) + h ^= s[i], h *= 16777619; + return h; +} /*! @function - @abstract Get the number of buckets in the hash table + @abstract Get the start iterator @param h Pointer to the hash table [khash_t(name)*] - @return Number of buckets in the hash table [khint_t] + @return The start iterator [khint_t] */ -#define kh_n_buckets(h) ((h)->n_buckets) +#define kh_begin(h) (khint_t)(0) /*! @function @abstract Iterate over the entries in the hash table @@ -329,10 +511,13 @@ static inline int oideq_by_value(struct object_id a, struct object_id b) return oideq(&a, &b); } -KHASH_INIT(oid_set, struct object_id, int, 0, oidhash_by_value, oideq_by_value) +KHASHL_SET_INIT(KH_LOCAL, kh_oid_set, oid_set, struct object_id, + oidhash_by_value, oideq_by_value) -KHASH_INIT(oid_map, struct object_id, void *, 1, oidhash_by_value, oideq_by_value) +KHASHL_MAP_INIT(KH_LOCAL, kh_oid_map, oid_map, struct object_id, void *, + oidhash_by_value, oideq_by_value) -KHASH_INIT(oid_pos, struct object_id, int, 1, oidhash_by_value, oideq_by_value) +KHASHL_MAP_INIT(KH_LOCAL, kh_oid_pos, oid_pos, struct object_id, int, + oidhash_by_value, oideq_by_value) -#endif /* __AC_KHASH_H */ +#endif /* __AC_KHASHL_H */ diff --git a/object-store-ll.h b/object-store-ll.h index 26a3895c82..401c4beff5 100644 --- a/object-store-ll.h +++ b/object-store-ll.h @@ -160,7 +160,7 @@ struct raw_object_store { */ struct object_directory *odb; struct object_directory **odb_tail; - struct kh_odb_path_map *odb_by_path; + struct odb_path_map *odb_by_path; int loaded_alternates; diff --git a/object-store.h b/object-store.h index 1b3e3d7d01..7917213190 100644 --- a/object-store.h +++ b/object-store.h @@ -5,7 +5,8 @@ #include "dir.h" #include "object-store-ll.h" -KHASH_INIT(odb_path_map, const char * /* key: odb_path */, - struct object_directory *, 1, fspathhash, fspatheq) +KHASHL_MAP_INIT(KH_LOCAL, odb_path_map, odb_path_map, + const char * /* key: odb_path */, struct object_directory *, + fspathhash, fspatheq) #endif /* OBJECT_STORE_H */
--- list-objects-filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/list-objects-filter.c b/list-objects-filter.c index 4346f8da45..440f112d23 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -704,7 +704,7 @@ static void filter_combine__free(void *filter_data) for (sub = 0; sub < d->nr; sub++) { list_objects_filter__free(d->sub[sub].filter); oidset_clear(&d->sub[sub].seen); - if (d->sub[sub].omits.set.size) + if (kh_size(&d->sub[sub].omits.set)) BUG("expected oidset to be cleared already"); } free(d->sub);
--- lib/PublicInbox/CodeSearch.pm | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm index cceff3c6..950f25c2 100644 --- a/lib/PublicInbox/CodeSearch.pm +++ b/lib/PublicInbox/CodeSearch.pm @@ -222,10 +222,11 @@ sub _cmt_ct { # retry_reopen cb my ($self, $cmt) = @_; my @ids = sort { $a <=> $b } $self->docids_by_postlist('Q'.$cmt); if (!@ids) { - carp "W: commit $cmt not indexed"; - return (time + 3600); + carp +"W: commit $cmt not indexed (--prune isn't exhaustive, yet)"; + return; } - scalar(@ids) == 1 or carp "BUG? `$cmt' indexed multiple times\n"; + scalar(@ids) == 1 or carp "BUG? commit $cmt indexed multiple times\n"; for my $id (@ids) { my $doc = $self->get_doc($id) or next; return int_val($doc, CT); @@ -249,6 +250,7 @@ BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir} if (@ids > 1) { @ret = uniqstr(@ret); my %ct = map { $_ => commit_ct($self, $_) } @ret; + @ret = grep { defined($ct{$_}) } @ret; @ret = sort { $ct{$a} <=> $ct{$b} } @ret ; } @ret; @@ -264,7 +266,8 @@ sub paths2roots { # for diagnostics my %ct; for my $root_oidhex (keys %$tmp) { my $paths = delete $tmp->{$root_oidhex}; - $ct{$root_oidhex} = commit_ct($self, $root_oidhex); + $ct{$root_oidhex} = commit_ct($self, $root_oidhex) // + next; push @{$ret{$_}}, $root_oidhex for @$paths; } for my $oids (values %ret) {
--- lib/PublicInbox/CodeSearchIdx.pm | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 41f6b999..244c4e8b 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -480,7 +480,9 @@ sub check_existing { # retry_reopen callback } sub partition_refs ($$$) { - my ($self, $git, $refs) = @_; # show-ref --heads --tags --hash output + my ($self, $git, $repo) = @_; # show-ref --heads --tags --hash output + my $refs = delete $repo->{refs}; + my @roots = @{$repo->{roots}}; sysseek($refs, 0, SEEK_SET); my $rfh = $git->popen(qw(rev-list --stdin), undef, { 0 => $refs }); my $seen = 0; @@ -491,7 +493,7 @@ sub partition_refs ($$$) { } @RDONLY_XDB; my $n0 = $NCHANGE; - while (defined(my $cmt = <$rfh>)) { + while (defined(my $cmt = <$rfh> // shift @roots)) { chomp $cmt; my $n = hex(substr($cmt, 0, 8)) % scalar(@RDONLY_XDB); if ($REINDEX && $REINDEX->set_maybe(pack('H*', $cmt), '')) { @@ -659,7 +661,7 @@ sub index_repo { } $repo->{roots} = \@roots; local $self->{current_info} = $git->{git_dir}; - my @shard_in = partition_refs($self, $git, delete($repo->{refs})); + my @shard_in = partition_refs($self, $git, $repo); $repo->{git_dir} = $git->{git_dir}; my $repo_ctx = $REPO_CTX = { self => $self, repo => $repo }; delete $git->{-cidx_gits_fini}; # may fire gits_fini
WIPjoin --- lib/PublicInbox/CodeSearch.pm | 52 +++++++++++++++++++++++++++++--- lib/PublicInbox/CodeSearchIdx.pm | 7 +++-- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm index e5fa4480..cceff3c6 100644 --- a/lib/PublicInbox/CodeSearch.pm +++ b/lib/PublicInbox/CodeSearch.pm @@ -10,6 +10,7 @@ use parent qw(PublicInbox::Search); use PublicInbox::Config; use PublicInbox::Search qw(retry_reopen int_val xap_terms); use PublicInbox::Compat qw(uniqstr); +use Carp qw(carp); use Compress::Zlib qw(uncompress); use constant { AT => 0, # author time YYYYMMDDHHMMSS, dt: for mail) @@ -217,32 +218,74 @@ BUG: (non-fatal) $git_dir indexed multiple times in $self->{topdir} @ids; } +sub _cmt_ct { # retry_reopen cb + my ($self, $cmt) = @_; + my @ids = sort { $a <=> $b } $self->docids_by_postlist('Q'.$cmt); + if (!@ids) { + carp "W: commit $cmt not indexed"; + return (time + 3600); + } + scalar(@ids) == 1 or carp "BUG? `$cmt' indexed multiple times\n"; + for my $id (@ids) { + my $doc = $self->get_doc($id) or next; + return int_val($doc, CT); + } + carp "W: commit $cmt unindexed/gone(?) (ids: @ids)\n"; + undef; +} + +# returns the commit time of a given commit OID +sub commit_ct ($$) { + my ($self, $cmt) = @_; + retry_reopen($self, \&_cmt_ct, $cmt); +} + sub root_oids ($$) { my ($self, $git_dir) = @_; my @ids = docids_of_git_dir $self, $git_dir or warn <<""; BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir} my @ret = map { xap_terms('G', $self->xdb, $_) } @ids; - @ret = uniqstr(@ret) if @ids > 1; + if (@ids > 1) { + @ret = uniqstr(@ret); + my %ct = map { $_ => commit_ct($self, $_) } @ret; + @ret = sort { $ct{$a} <=> $ct{$b} } @ret ; + } @ret; } -sub paths2roots { +sub paths2roots { # for diagnostics my ($self, $paths) = @_; my %ret; if ($paths) { for my $p (keys %$paths) { @{$ret{$p}} = root_oids($self, $p) } } else { my $tmp = roots2paths($self); + my %ct; for my $root_oidhex (keys %$tmp) { my $paths = delete $tmp->{$root_oidhex}; + $ct{$root_oidhex} = commit_ct($self, $root_oidhex); push @{$ret{$_}}, $root_oidhex for @$paths; } - @$_ = sort(@$_) for values %ret; + for my $oids (values %ret) { + # sort OIDs by commit time ascending + @$oids = sort { $ct{$a} <=> $ct{$b} } @$oids; + } } \%ret; } +sub base2roots { # for diagnostics + my ($self, $paths) = @_; + my $tmp = paths2roots($self, $paths); + my $ret = {}; + while (my ($git_dir, $roots) = each %$tmp) { + my $bn = substr($git_dir, rindex($git_dir, '/') + 1); + ++$ret->{$bn}->{$_} for @$roots; + } + $ret; +} + sub load_ct { # retry_reopen cb my ($self, $git_dir) = @_; my @ids = docids_of_git_dir $self, $git_dir or return; @@ -252,6 +295,7 @@ sub load_ct { # retry_reopen cb } } +# this is for git repos, not individual commits sub load_commit_times { # each_cindex callback my ($self, $todo) = @_; # todo = [ [ time, git ], [ time, git ] ...] my (@pending, $rec, $ct); @@ -366,7 +410,7 @@ sub repos_sorted { my @recs = map { [ 0, $_ ] } @_; # PublicInbox::Git objects my @todo = @recs; $pi_cfg->each_cindex(\&load_commit_times, \@todo); - @recs = sort { $b->[0] <=> $a->[0] } @recs; # sort by commit time + @recs = sort { $b->[0] <=> $a->[0] } @recs; # sort by repo commit time } 1; diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 570ff64f..41f6b999 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -1250,10 +1250,13 @@ sub show_json { # for diagnostics (unstable output) my %ret; my @todo = @$s; while (defined(my $f = shift @todo)) { - if ($f =~ /\A(?:roots2paths|paths2roots|join_data)\z/) { + if ($f =~ /,/) { + push @todo, split(/,/, $f); + } elsif ($f =~ /\A(?:roots2paths|paths2roots|join_data| + base2roots)\z/x) { $ret{$f} = $self->$f; } elsif ($f eq '') { # default --show (no args) - push @todo, qw(roots2paths join_data); + push @todo, qw(base2roots join_data); } else { warn "E: cannot show `$f'\n"; }
--- sv.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/sv.c b/sv.c index 9c7f3ba..77b679e 100644 --- a/sv.c +++ b/sv.c @@ -297,6 +297,44 @@ Public API: ++PL_sv_count; \ } STMT_END +#define TRACE_ARENA 1 +#if TRACE_ARENA +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +static time_t trace_start = -1; +static int trace_arena_alloc; + +static void __attribute__((constructor)) init_arena_trace(void) +{ + const char *t = getenv("TRACE_ARENA"); + if (t && *t) + trace_arena_alloc = 1; +} + +static void trace_arena(const svtype sv_type, size_t req, size_t actual) +{ + struct timespec ts; + + if (!trace_arena_alloc) + return; + + (void)clock_gettime(CLOCK_MONOTONIC, &ts); + if (trace_start < 0) { + trace_start = ts.tv_sec + 10; + } else if (trace_start > 0) { + if (ts.tv_sec > trace_start) + trace_start = 0; + } + if (!trace_start) + dprintf(2, "%ld %u %zu -> %zu\n", + ts.tv_sec, sv_type, req, actual); +} +#else /* !TRACE_ARENA */ +static void trace_arena(const svtype sv_type, size_t req, size_t actual) +{ +} +#endif /* TRACE_ARENA */ /* make some more SVs by adding another arena */ @@ -306,6 +344,7 @@ S_more_sv(pTHX) SV* sv; char *chunk; /* must use New here to match call to */ Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */ + trace_arena(SVt_NULL, 0, PERL_ARENA_SIZE); sv_add_arena(chunk, PERL_ARENA_SIZE, 0); uproot_SV(sv); return sv; @@ -1102,6 +1141,7 @@ Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size, assert (bodies_by_type[i].type == i); } #endif + trace_arena(sv_type, arena_size, good_arena_size); assert(arena_size);
--- sv.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/sv.c b/sv.c index 9c7f3ba..a04e1dc 100644 --- a/sv.c +++ b/sv.c @@ -297,6 +297,44 @@ Public API: ++PL_sv_count; \ } STMT_END +#define TRACE_ARENA 1 +#if TRACE_ARENA +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +static time_t trace_start = -1; +static int trace_arena_alloc; + +static void __attribute__((constructor)) init_arena_trace(void) +{ + const char *t = getenv("TRACE_ARENA"); + if (t && *t) + trace_arena_alloc = 1; +} + +static void trace_arena(const svtype sv_type, size_t req, size_t actual) +{ + struct timespec ts; + + if (!trace_arena_alloc) + return; + + (void)clock_gettime(CLOCK_MONOTONIC, &ts); + if (trace_start < 0) { + trace_start = ts.tv_sec + 10; + } else if (trace_start > 0) { + if (ts.tv_sec > trace_start) + trace_start = 0; + } + if (!trace_start) + dprintf(2, "%ld %u %zu -> %zu\n", + ts.tv_sec, sv_type, req, actual); +} +#else /* !TRACE_ARENA */ +static void trace_arena(const svtype sv_type, size_t req, size_t actual) +{ +} +#endif /* TRACE_ARENA */ /* make some more SVs by adding another arena */ @@ -306,6 +344,7 @@ S_more_sv(pTHX) SV* sv; char *chunk; /* must use New here to match call to */ Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */ + trace_arena(SVt_NULL, 0, PERL_ARENA_SIZE); sv_add_arena(chunk, PERL_ARENA_SIZE, 0); uproot_SV(sv); return sv; @@ -1091,6 +1130,8 @@ Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size, #if defined(DEBUGGING) && !defined(PERL_GLOBAL_STRUCT) static bool done_sanity_check; + trace_arena(sv_type, arena_size, good_arena_size); + /* PERL_GLOBAL_STRUCT cannot coexist with global * variables like done_sanity_check. */ if (!done_sanity_check) {
If publicinbox.cgitrc is set in the config file, we'll ensure cgit sees it as CGIT_CONFIG since the configured publicinbox.cgitrc knob may not be the default path the cgit.cgi binary was configured to use. Furthermore, we'll respect CGIT_CONFIG in the environment if publicinbox.cgitrc is unset in the config file at -httpd/-netd startup. --- lib/PublicInbox/Cgit.pm | 12 +++++------- lib/PublicInbox/Config.pm | 3 ++- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm index 10cad57a..78fc9ca0 100644 --- a/lib/PublicInbox/Cgit.pm +++ b/lib/PublicInbox/Cgit.pm @@ -58,6 +58,7 @@ sub new { cmd => [ $cgit_bin ], cgit_data => $cgit_data, pi_cfg => $pi_cfg, + cgitrc => $pi_cfg->{'publicinbox.cgitrc'} // $ENV{CGIT_CONFIG}, }, $class; # some cgit repos may not be mapped to inboxes, so ensure those exist: @@ -100,15 +101,12 @@ sub call { return PublicInbox::WwwStatic::response($env, [], $f); } - my $cgi_env = { PATH_INFO => $path_info }; - foreach (@PASS_ENV) { - my $v = $env->{$_} // next; - $cgi_env->{$_} = $v; - } - $cgi_env->{'HTTPS'} = 'on' if $env->{'psgi.url_scheme'} eq 'https'; + my %cgi_env = (CGIT_CONFIG => $self->{cgitrc}, PATH_INFO => $path_info); + @cgi_env{@PASS_ENV} = @$env{@PASS_ENV}; # spawn ignores undef vals + $cgi_env{HTTPS} = 'on' if $env->{'psgi.url_scheme'} eq 'https'; my $rdr = input_prepare($env) or return r(500); - my $qsp = PublicInbox::Qspawn->new($self->{cmd}, $cgi_env, $rdr); + my $qsp = PublicInbox::Qspawn->new($self->{cmd}, \%cgi_env, $rdr); my $limiter = $self->{pi_cfg}->limiter('-cgit'); $qsp->psgi_yield($env, $limiter, $parse_cgi_headers, $ctx); } diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index b8d3c485..607197f6 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -305,7 +305,8 @@ sub apply_cgit_scan_path { sub parse_cgitrc { my ($self, $cgitrc, $nesting) = @_; - $cgitrc //= $self->{'publicinbox.cgitrc'} // return; + $cgitrc //= $self->{'publicinbox.cgitrc'} // + $ENV{CGIT_CONFIG} // return; if ($nesting == 0) { # defaults: my %s = map { $_ => 1 } qw(/cgit.css /cgit.png
--- Documentation/public-inbox-config.pod | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod index b875df58..d3f5d04b 100644 --- a/Documentation/public-inbox-config.pod +++ b/Documentation/public-inbox-config.pod @@ -356,7 +356,9 @@ is set and the C<cgit> binary exists). See L<public-inbox-edit(1)> =item publicinbox.indexMaxSize + =item publicinbox.indexBatchSize + =item publicinbox.indexSequentialShard See L<public-inbox-index(1)>
It'll probably be done for another release, I doubt most cgit users are willing to completely replace it with our coderepo viewer just yet... --- Documentation/public-inbox-config.pod | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod index f533ea9d..b875df58 100644 --- a/Documentation/public-inbox-config.pod +++ b/Documentation/public-inbox-config.pod @@ -337,11 +337,15 @@ Try using C<cgit> as the first choice, this is the default. Fall back to using C<cgit> only if our native, inbox-aware git code repository viewer doesn't recognize the URL. +=begin comment +=for comment rewrite is not yet implemented =item * rewrite Rewrite C<cgit> URLs for our native, inbox-aware code repository viewer. This implies C<fallback> for URLs the native viewer does not recognize. +=end comment + =back Default: C<first> (C<cgit> will be used iff C<publicinbox.cgitrc>
--- lib/PublicInbox/ViewVCS.pm | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index f755bf1e..7e13312e 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -28,7 +28,7 @@ use PublicInbox::Eml; use Text::Wrap qw(wrap); use PublicInbox::Hval qw(ascii_html to_filename prurl utf8_maybe); use POSIX qw(strftime); -use autodie qw(open seek); +use autodie qw(open seek truncate); use Fcntl qw(SEEK_SET); my $hl = eval { require PublicInbox::HlMod; @@ -158,7 +158,14 @@ sub cmt_hdr_prep { # psgi_qx cb utf8_maybe($buf); # non-UTF-8 commits exist chomp $buf; (my $P, my $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9); + truncate $fh, 0; return unless $P; + seek $fh, 0, SEEK_SET; + my $qsp_p = PublicInbox::Qspawn->new($ctx->{git}->cmd(qw(show + --encoding=UTF-8 --pretty=format:%n -M --stat -p), $ctx->{oid}), + undef, { 1 => $fh }); + $qsp_p->{qsp_err} = \($ctx->{-qsp_err_p} = ''); + $qsp_p->psgi_qx($ctx->{env}, undef, \&cmt_patch_prep, $ctx, $cmt_fin); @{$ctx->{-cmt_P}} = split / /, $P; @{$ctx->{-cmt_p}} = split / /, $p; # abbreviated do_cat_async([$ctx, $cmt_fin], \&cmt_title, @{$ctx->{-cmt_P}}); @@ -372,24 +379,15 @@ sub show_commit ($$) { # patch-id needs two passes, and we use the initial show to ensure # a patch embedded inside the commit message body doesn't get fed # to patch-id: - my $genv = { GIT_DIR => $git->{git_dir} }; - my $dir = "$ctx->{-tmp}"; - my ($opt_h, $opt_p) = ({ -C => $dir }, { -C => $dir }); - open $opt_h->{1}, '+>', "$dir/h"; - open $opt_p->{1}, '+>', "$dir/p"; - $ctx->{patch_fh} = $opt_p->{1}; + open $ctx->{patch_fh}, '+>', "$ctx->{-tmp}/show"; my $qsp_h = PublicInbox::Qspawn->new($git->cmd('show', $SHOW_FMT, - qw(--encoding=UTF-8 -z --no-notes --no-patch), $oid), - $genv, $opt_h); - my $qsp_p = PublicInbox::Qspawn->new($git->cmd(qw(show - --encoding=UTF-8 --pretty=format:%n -M --stat -p), $oid), - $genv, $opt_p); + qw(--encoding=UTF-8 -z --no-notes --no-patch), $oid), + undef, { 1 => $ctx->{patch_fh} }); $qsp_h->{qsp_err} = \($ctx->{-qsp_err_h} = ''); - $qsp_p->{qsp_err} = \($ctx->{-qsp_err_p} = ''); my $cmt_fin = PublicInbox::OnDestroy->new($$, \&cmt_fin, $ctx); $ctx->{git} = $git; + $ctx->{oid} = $oid; $qsp_h->psgi_qx($ctx->{env}, undef, \&cmt_hdr_prep, $ctx, $cmt_fin); - $qsp_p->psgi_qx($ctx->{env}, undef, \&cmt_patch_prep, $ctx, $cmt_fin); } sub show_other ($$) { # just in case...
--- lib/PublicInbox/CodeSearch.pm | 16 +++++++++++++++- lib/PublicInbox/CodeSearchIdx.pm | 5 +++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm index 1f95a726..96732c79 100644 --- a/lib/PublicInbox/CodeSearch.pm +++ b/lib/PublicInbox/CodeSearch.pm @@ -227,7 +227,7 @@ BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir} @ret; } -sub paths2roots { +sub paths2roots { # for diagnostics my ($self, $paths) = @_; my %ret; if ($paths) { @@ -243,6 +243,20 @@ sub paths2roots { \%ret; } +sub basename_roots { # for diagnostics + my ($self, $paths) = @_; + my $tmp = paths2roots($self, $paths); + my $ret = {}; + while (my ($git_dir, $roots) = each %$tmp) { + my $bn = substr($git_dir, rindex($git_dir, '/') + 1); + ++$ret->{$bn}->{$_} for @$roots; + } + # for my $bn (keys %$ret) { + # $ret->{$bn} + # } + $ret; +} + sub load_ct { # retry_reopen cb my ($self, $git_dir) = @_; my @ids = docids_of_git_dir $self, $git_dir or return; diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 570ff64f..91c0146b 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -1250,10 +1250,11 @@ sub show_json { # for diagnostics (unstable output) my %ret; my @todo = @$s; while (defined(my $f = shift @todo)) { - if ($f =~ /\A(?:roots2paths|paths2roots|join_data)\z/) { + if ($f =~ /\A(?:roots2paths|paths2roots|join_data| + basename_roots)\z/x) { $ret{$f} = $self->$f; } elsif ($f eq '') { # default --show (no args) - push @todo, qw(roots2paths join_data); + push @todo, qw(basename_roots join_data); } else { warn "E: cannot show `$f'\n"; }