From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] cindex: add --show-roots switch
Date: Sat, 19 Aug 2023 21:39:43 +0000 [thread overview]
Message-ID: <20230819213943.3479224-1-e@80x24.org> (raw)
This aids in getting things to work.
---
lib/PublicInbox/CodeSearchIdx.pm | 32 ++++++++++++++++++++++++++++++++
script/public-inbox-cindex | 2 +-
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 5039e09e..c3d4da1c 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -1153,6 +1153,37 @@ sub _prep_ibx { # each_inbox callback
push @IBX, $ibx;
}
+sub show_roots { # for diagnostics
+ my ($self) = @_;
+ local $self->{xdb};
+ my $cur = $self->xdb->allterms_begin('G');
+ my $end = $self->{xdb}->allterms_end('G');
+ my $qrepo = $PublicInbox::Search::X{Query}->new('T'.'r');
+ my $enq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
+ $enq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
+ $enq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
+ for (; $cur != $end; $cur++) {
+ my $G_oidhex = $cur->get_termname;
+ my $qry = $PublicInbox::Search::X{Query}->new(
+ PublicInbox::Search::OP_FILTER(),
+ $qrepo, $G_oidhex);
+ $enq->set_query($qry);
+ my ($off, $lim) = (0, 10000);
+ say 'commit ',substr($G_oidhex, 1), ' appears in:';
+ while (1) {
+ my $mset = $enq->get_mset($off, $lim);
+ my $size = $mset->size or last;
+ for my $x ($mset->items) {
+ my $doc = $x->get_document;
+ for (xap_terms('P', $x->get_document)) {
+ say '- /', substr($_, 1);
+ }
+ }
+ $off += $size;
+ }
+ }
+}
+
sub cidx_run { # main entry point
my ($self) = @_;
my $restore_umask = prep_umask($self);
@@ -1244,6 +1275,7 @@ sub cidx_run { # main entry point
PublicInbox::DS->Reset;
warn "$$ OK DONE";
$self->lock_release(!!$NCHANGE);
+ show_roots($self) if $self->{-opt}->{'show-roots'} # for diagnostics
}
sub ipc_atfork_child { # @IDX_SHARDS
diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex
index 888c8b10..0526434c 100755
--- a/script/public-inbox-cindex
+++ b/script/public-inbox-cindex
@@ -29,7 +29,7 @@ GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous
indexlevel|index-level|L=s associate associate-max=i
associate-date-range=s associate-prefixes=s@
batch_size|batch-size=s max_size|max-size=s
- include|I=s@ only=s@ all
+ include|I=s@ only=s@ all show-roots
project-list=s exclude=s@
sort-parallel=s sort-compress-program=s sort-buffer-size=s
d=s update|u scan! prune dry-run|n C=s@ help|h))
reply other threads:[~2023-08-19 21:39 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230819213943.3479224-1-e@80x24.org \
--to=e@80x24.org \
--cc=spew@80x24.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).