From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 498A81F406 for ; Sat, 19 Aug 2023 21:39:43 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1692481183; bh=LzMe7Uefozpa5uLdMLNQum9OOGtCZSYH8vths+NL/T0=; h=From:To:Subject:Date:From; b=2oteirfy14Ypc+2RLDW8C/DWbUssqThVqMappYrmVRxaEyPp9G/b6/NOZ2AekQAWE R0SwemnFF1FjL+7jbwmPV4/Bo0oOvPa06ceSdskf4o+yyu+M7jwPS3sgzHyrQIe1/P Htrkseh6TDIoJlK3bqWFZ9OZ4ZLfFO77mP05XsXk= From: Eric Wong To: spew@80x24.org Subject: [PATCH] cindex: add --show-roots switch Date: Sat, 19 Aug 2023 21:39:43 +0000 Message-Id: <20230819213943.3479224-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This aids in getting things to work. --- lib/PublicInbox/CodeSearchIdx.pm | 32 ++++++++++++++++++++++++++++++++ script/public-inbox-cindex | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 5039e09e..c3d4da1c 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -1153,6 +1153,37 @@ sub _prep_ibx { # each_inbox callback push @IBX, $ibx; } +sub show_roots { # for diagnostics + my ($self) = @_; + local $self->{xdb}; + my $cur = $self->xdb->allterms_begin('G'); + my $end = $self->{xdb}->allterms_end('G'); + my $qrepo = $PublicInbox::Search::X{Query}->new('T'.'r'); + my $enq = $PublicInbox::Search::X{Enquire}->new($self->{xdb}); + $enq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new); + $enq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING); + for (; $cur != $end; $cur++) { + my $G_oidhex = $cur->get_termname; + my $qry = $PublicInbox::Search::X{Query}->new( + PublicInbox::Search::OP_FILTER(), + $qrepo, $G_oidhex); + $enq->set_query($qry); + my ($off, $lim) = (0, 10000); + say 'commit ',substr($G_oidhex, 1), ' appears in:'; + while (1) { + my $mset = $enq->get_mset($off, $lim); + my $size = $mset->size or last; + for my $x ($mset->items) { + my $doc = $x->get_document; + for (xap_terms('P', $x->get_document)) { + say '- /', substr($_, 1); + } + } + $off += $size; + } + } +} + sub cidx_run { # main entry point my ($self) = @_; my $restore_umask = prep_umask($self); @@ -1244,6 +1275,7 @@ sub cidx_run { # main entry point PublicInbox::DS->Reset; warn "$$ OK DONE"; $self->lock_release(!!$NCHANGE); + show_roots($self) if $self->{-opt}->{'show-roots'} # for diagnostics } sub ipc_atfork_child { # @IDX_SHARDS diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex index 888c8b10..0526434c 100755 --- a/script/public-inbox-cindex +++ b/script/public-inbox-cindex @@ -29,7 +29,7 @@ GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous indexlevel|index-level|L=s associate associate-max=i associate-date-range=s associate-prefixes=s@ batch_size|batch-size=s max_size|max-size=s - include|I=s@ only=s@ all + include|I=s@ only=s@ all show-roots project-list=s exclude=s@ sort-parallel=s sort-compress-program=s sort-buffer-size=s d=s update|u scan! prune dry-run|n C=s@ help|h))