From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id A61571F406 for ; Fri, 8 Dec 2023 10:23:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1702031025; bh=wxVt1T330xgW/OhsR9o+jrPbiyxRITGW4KFSrJSmbrw=; h=From:To:Subject:Date:From; b=MlJ4+ljfqNDj6f5sWMVXPp5hwdnv/LrZT2kIw1OutNE1eynRhDC93t5uvQcjz7nRt WrulouQAiEU77v/5kwXCvOUS8azrxNZUqbLIHAzV8U3lT5vMdUx6Ka6xZlI20z7Qw2 YCj97Pi9vdrl4X9w2QEGwehkW8SAm25yPcpOdWro= From: Eric Wong To: spew@80x24.org Subject: [PATCH] cindex: parallel check_existing Date: Fri, 8 Dec 2023 10:23:45 +0000 Message-ID: <20231208102345.3821268-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: No speedup measurable, unfortunately... --- lib/PublicInbox/CodeSearchIdx.pm | 46 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 5d420de2..d76ede5f 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -444,39 +444,42 @@ sub fp_async_done { # run_git cb from worker my ($opt, $self, $git, $op_p) = @_; my $refs = delete $opt->{1} // 'BUG: no {-repo}->{refs}'; sysseek($refs, 0, SEEK_SET); - send($op_p, 'fp_done '.sha_all(256, $refs)->hexdigest, 0); + my $fp = sha_all(256, $refs)->hexdigest; + @RDONLY_XDB = $self->xdb_shards_flat if !@RDONLY_XDB; + my $n = git_dir_hash($git->{git_dir}) % scalar(@RDONLY_XDB); + my $shard = bless { %$self, shard => $n }, ref($self); + $shard->{xdb} = $RDONLY_XDB[$n] // die "BUG: shard[$n] undef"; + my ($docid, @to_delete) = $shard->retry_reopen(\&check_existing, + $self, $git, $fp); + send($op_p, "fp_done $n $docid $fp @to_delete", 0); } sub fp_done { # called parent via PktOp by fp_async_done - my ($self, $git, $next_on_err, $hex) = @_; + my ($self, $git, $next_on_err, $n, $docid, $fp, @to_delete) = @_; $next_on_err->cancel; return if $DO_QUIT; - $git->{-repo}->{fp} = $hex; - my $n = git_dir_hash($git->{git_dir}) % scalar(@RDONLY_XDB); - my $shard = bless { %$self, shard => $n }, ref($self); + if ($docid == 0) { # no change + delete $git->{-repo}; + return index_next($self); + } $git->{-repo}->{shard_n} = $n; - delete @$shard{qw(lockfh lock_path)}; - local $shard->{xdb} = $RDONLY_XDB[$n] // die "BUG: shard[$n] undef"; - $shard->retry_reopen(\&check_existing, $self, $git); + $git->{-repo}->{fp} = $fp; + $git->{-repo}->{docid} = $docid if $docid > 0; + if (@to_delete) { + warn "BUG: $git->{git_dir} indexed multiple times, culling\n"; + $git->{-repo}->{to_delete} = \@to_delete; # XXX needed? + } + prep_repo($self, $git); } sub check_existing { # retry_reopen callback - my ($shard, $self, $git) = @_; + my ($shard, $self, $git, $fp) = @_; my @docids = $shard->docids_of_git_dir($git->{git_dir}); - my $docid = shift(@docids) // return prep_repo($self, $git); # new repo + my $docid = shift(@docids) // return (-1); # new repo my $doc = $shard->get_doc($docid) // die "BUG: no #$docid ($git->{git_dir})"; my $old_fp = $REINDEX ? "\0invalid" : $doc->get_data; - if ($old_fp eq $git->{-repo}->{fp}) { # no change - delete $git->{-repo}; - return index_next($self); - } - $git->{-repo}->{docid} = $docid; - if (@docids) { - warn "BUG: $git->{git_dir} indexed multiple times, culling\n"; - $git->{-repo}->{to_delete} = \@docids; # XXX needed? - } - prep_repo($self, $git); + $old_fp eq $fp ? (0) : ($docid, @docids); } sub partition_refs ($$$) { @@ -1307,6 +1310,7 @@ sub cidx_run { # main entry point } init_join_prefork($self) } + $REINDEX = $self->{-opt}->{reindex}; local @IDX_SHARDS = cidx_init($self); # forks workers local $ANY_SHARD = -1; local $self->{current_info} = ''; @@ -1324,7 +1328,7 @@ sub cidx_run { # main entry point $cb->($m, @_); }; load_existing($self) unless $self->{-cidx_internal}; - if ($self->{-opt}->{reindex}) { + if ($REINDEX) { require PublicInbox::SharedKV; $REINDEX = PublicInbox::SharedKV->new; delete $REINDEX->{lock_path};