dumping ground for random patches and texts
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] WIP-join
Date: Wed, 20 Mar 2024 06:11:14 +0000	[thread overview]
Message-ID: <20240320061114.2375503-1-e@80x24.org> (raw)

WIPjoin
---
 lib/PublicInbox/CodeSearch.pm    | 52 +++++++++++++++++++++++++++++---
 lib/PublicInbox/CodeSearchIdx.pm |  7 +++--
 2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm
index e5fa4480..cceff3c6 100644
--- a/lib/PublicInbox/CodeSearch.pm
+++ b/lib/PublicInbox/CodeSearch.pm
@@ -10,6 +10,7 @@ use parent qw(PublicInbox::Search);
 use PublicInbox::Config;
 use PublicInbox::Search qw(retry_reopen int_val xap_terms);
 use PublicInbox::Compat qw(uniqstr);
+use Carp qw(carp);
 use Compress::Zlib qw(uncompress);
 use constant {
 	AT => 0, # author time YYYYMMDDHHMMSS, dt: for mail)
@@ -217,32 +218,74 @@ BUG: (non-fatal) $git_dir indexed multiple times in $self->{topdir}
 	@ids;
 }
 
+sub _cmt_ct { # retry_reopen cb
+	my ($self, $cmt) = @_;
+	my @ids = sort { $a <=> $b } $self->docids_by_postlist('Q'.$cmt);
+	if (!@ids) {
+		carp "W: commit $cmt not indexed";
+		return (time + 3600);
+	}
+	scalar(@ids) == 1 or carp "BUG? `$cmt' indexed multiple times\n";
+	for my $id (@ids) {
+		my $doc = $self->get_doc($id) or next;
+		return int_val($doc, CT);
+	}
+	carp "W: commit $cmt unindexed/gone(?) (ids: @ids)\n";
+	undef;
+}
+
+# returns the commit time of a given commit OID
+sub commit_ct ($$) {
+	my ($self, $cmt) = @_;
+	retry_reopen($self, \&_cmt_ct, $cmt);
+}
+
 sub root_oids ($$) {
 	my ($self, $git_dir) = @_;
 	my @ids = docids_of_git_dir $self, $git_dir or warn <<"";
 BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir}
 
 	my @ret = map { xap_terms('G', $self->xdb, $_) } @ids;
-	@ret = uniqstr(@ret) if @ids > 1;
+	if (@ids > 1) {
+		@ret = uniqstr(@ret);
+		my %ct = map { $_ => commit_ct($self, $_) } @ret;
+		@ret = sort { $ct{$a} <=> $ct{$b} } @ret ;
+	}
 	@ret;
 }
 
-sub paths2roots {
+sub paths2roots { # for diagnostics
 	my ($self, $paths) = @_;
 	my %ret;
 	if ($paths) {
 		for my $p (keys %$paths) { @{$ret{$p}} = root_oids($self, $p) }
 	} else {
 		my $tmp = roots2paths($self);
+		my %ct;
 		for my $root_oidhex (keys %$tmp) {
 			my $paths = delete $tmp->{$root_oidhex};
+			$ct{$root_oidhex} = commit_ct($self, $root_oidhex);
 			push @{$ret{$_}}, $root_oidhex for @$paths;
 		}
-		@$_ = sort(@$_) for values %ret;
+		for my $oids (values %ret) {
+			# sort OIDs by commit time ascending
+			@$oids = sort { $ct{$a} <=> $ct{$b} } @$oids;
+		}
 	}
 	\%ret;
 }
 
+sub base2roots { # for diagnostics
+	my ($self, $paths) = @_;
+	my $tmp = paths2roots($self, $paths);
+	my $ret = {};
+	while (my ($git_dir, $roots) = each %$tmp) {
+		my $bn = substr($git_dir, rindex($git_dir, '/') + 1);
+		++$ret->{$bn}->{$_} for @$roots;
+	}
+	$ret;
+}
+
 sub load_ct { # retry_reopen cb
 	my ($self, $git_dir) = @_;
 	my @ids = docids_of_git_dir $self, $git_dir or return;
@@ -252,6 +295,7 @@ sub load_ct { # retry_reopen cb
 	}
 }
 
+# this is for git repos, not individual commits
 sub load_commit_times { # each_cindex callback
 	my ($self, $todo) = @_; # todo = [ [ time, git ], [ time, git ] ...]
 	my (@pending, $rec, $ct);
@@ -366,7 +410,7 @@ sub repos_sorted {
 	my @recs = map { [ 0, $_ ] } @_; # PublicInbox::Git objects
 	my @todo = @recs;
 	$pi_cfg->each_cindex(\&load_commit_times, \@todo);
-	@recs = sort { $b->[0] <=> $a->[0] } @recs; # sort by commit time
+	@recs = sort { $b->[0] <=> $a->[0] } @recs; # sort by repo commit time
 }
 
 1;
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 570ff64f..41f6b999 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -1250,10 +1250,13 @@ sub show_json { # for diagnostics (unstable output)
 	my %ret;
 	my @todo = @$s;
 	while (defined(my $f = shift @todo)) {
-		if ($f =~ /\A(?:roots2paths|paths2roots|join_data)\z/) {
+		if ($f =~ /,/) {
+			push @todo, split(/,/, $f);
+		} elsif ($f =~ /\A(?:roots2paths|paths2roots|join_data|
+				base2roots)\z/x) {
 			$ret{$f} = $self->$f;
 		} elsif ($f eq '') { # default --show (no args)
-			push @todo, qw(roots2paths join_data);
+			push @todo, qw(base2roots join_data);
 		} else {
 			warn "E: cannot show `$f'\n";
 		}

             reply	other threads:[~2024-03-20  6:11 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-20  6:11 Eric Wong [this message]
  -- strict thread matches above, loose matches on Subject: below --
2024-02-12 11:02 [PATCH] WIP-join Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240320061114.2375503-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=spew@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).