From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 882951F44D for ; Wed, 20 Mar 2024 06:11:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1710915074; bh=QxQHjkmtfFbrZEJ7M77i7PSK6w7yz8SU2Et9/2400ic=; h=From:To:Subject:Date:From; b=eGRVtNnh9jSZ1d862Dp+CfznbYY1XGSdVEpMJHUrkaQfWvIwLGftCi1ocpWUUOBZg nbDiy9mxvjIf0jpmS5qunHvZIQ1coj1w5utyljAVq8EJiQ+8Wqs51kWH67dCX+o9+w KXqyRf9ZdL2q0xyrVlAl62Rh2oTx63pJFUMFcKXM= From: Eric Wong To: spew@80x24.org Subject: [PATCH] WIP-join Date: Wed, 20 Mar 2024 06:11:14 +0000 Message-ID: <20240320061114.2375503-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: WIPjoin --- lib/PublicInbox/CodeSearch.pm | 52 +++++++++++++++++++++++++++++--- lib/PublicInbox/CodeSearchIdx.pm | 7 +++-- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm index e5fa4480..cceff3c6 100644 --- a/lib/PublicInbox/CodeSearch.pm +++ b/lib/PublicInbox/CodeSearch.pm @@ -10,6 +10,7 @@ use parent qw(PublicInbox::Search); use PublicInbox::Config; use PublicInbox::Search qw(retry_reopen int_val xap_terms); use PublicInbox::Compat qw(uniqstr); +use Carp qw(carp); use Compress::Zlib qw(uncompress); use constant { AT => 0, # author time YYYYMMDDHHMMSS, dt: for mail) @@ -217,32 +218,74 @@ BUG: (non-fatal) $git_dir indexed multiple times in $self->{topdir} @ids; } +sub _cmt_ct { # retry_reopen cb + my ($self, $cmt) = @_; + my @ids = sort { $a <=> $b } $self->docids_by_postlist('Q'.$cmt); + if (!@ids) { + carp "W: commit $cmt not indexed"; + return (time + 3600); + } + scalar(@ids) == 1 or carp "BUG? `$cmt' indexed multiple times\n"; + for my $id (@ids) { + my $doc = $self->get_doc($id) or next; + return int_val($doc, CT); + } + carp "W: commit $cmt unindexed/gone(?) (ids: @ids)\n"; + undef; +} + +# returns the commit time of a given commit OID +sub commit_ct ($$) { + my ($self, $cmt) = @_; + retry_reopen($self, \&_cmt_ct, $cmt); +} + sub root_oids ($$) { my ($self, $git_dir) = @_; my @ids = docids_of_git_dir $self, $git_dir or warn <<""; BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir} my @ret = map { xap_terms('G', $self->xdb, $_) } @ids; - @ret = uniqstr(@ret) if @ids > 1; + if (@ids > 1) { + @ret = uniqstr(@ret); + my %ct = map { $_ => commit_ct($self, $_) } @ret; + @ret = sort { $ct{$a} <=> $ct{$b} } @ret ; + } @ret; } -sub paths2roots { +sub paths2roots { # for diagnostics my ($self, $paths) = @_; my %ret; if ($paths) { for my $p (keys %$paths) { @{$ret{$p}} = root_oids($self, $p) } } else { my $tmp = roots2paths($self); + my %ct; for my $root_oidhex (keys %$tmp) { my $paths = delete $tmp->{$root_oidhex}; + $ct{$root_oidhex} = commit_ct($self, $root_oidhex); push @{$ret{$_}}, $root_oidhex for @$paths; } - @$_ = sort(@$_) for values %ret; + for my $oids (values %ret) { + # sort OIDs by commit time ascending + @$oids = sort { $ct{$a} <=> $ct{$b} } @$oids; + } } \%ret; } +sub base2roots { # for diagnostics + my ($self, $paths) = @_; + my $tmp = paths2roots($self, $paths); + my $ret = {}; + while (my ($git_dir, $roots) = each %$tmp) { + my $bn = substr($git_dir, rindex($git_dir, '/') + 1); + ++$ret->{$bn}->{$_} for @$roots; + } + $ret; +} + sub load_ct { # retry_reopen cb my ($self, $git_dir) = @_; my @ids = docids_of_git_dir $self, $git_dir or return; @@ -252,6 +295,7 @@ sub load_ct { # retry_reopen cb } } +# this is for git repos, not individual commits sub load_commit_times { # each_cindex callback my ($self, $todo) = @_; # todo = [ [ time, git ], [ time, git ] ...] my (@pending, $rec, $ct); @@ -366,7 +410,7 @@ sub repos_sorted { my @recs = map { [ 0, $_ ] } @_; # PublicInbox::Git objects my @todo = @recs; $pi_cfg->each_cindex(\&load_commit_times, \@todo); - @recs = sort { $b->[0] <=> $a->[0] } @recs; # sort by commit time + @recs = sort { $b->[0] <=> $a->[0] } @recs; # sort by repo commit time } 1; diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 570ff64f..41f6b999 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -1250,10 +1250,13 @@ sub show_json { # for diagnostics (unstable output) my %ret; my @todo = @$s; while (defined(my $f = shift @todo)) { - if ($f =~ /\A(?:roots2paths|paths2roots|join_data)\z/) { + if ($f =~ /,/) { + push @todo, split(/,/, $f); + } elsif ($f =~ /\A(?:roots2paths|paths2roots|join_data| + base2roots)\z/x) { $ret{$f} = $self->$f; } elsif ($f eq '') { # default --show (no args) - push @todo, qw(roots2paths join_data); + push @todo, qw(base2roots join_data); } else { warn "E: cannot show `$f'\n"; }