From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id C31B01F405 for ; Sun, 11 Feb 2024 21:50:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1707688257; bh=45pRlUETy1QFfBR78Wai1z4nzAyHX6UDGaZdGgFrb9s=; h=From:To:Subject:Date:From; b=fk1dVT0WeGl9bO2h8mOtHDsMz0VqrYGthmrlVhGpuVNhk/L1C66eanN3IpUa/tu+j NMCLZsSu3VB+fKzVashbsM3kOe5mCaNWFo926qBghAYG7YB/GSCBy3tRSnwFRNXIYF jQzMer4KbFFUB4fhtwFGjpQ6+Tmrw1bCh1UKrsj4= From: Eric Wong To: spew@80x24.org Subject: [PATCH] viewvcs: parallelize commit display Date: Sun, 11 Feb 2024 21:50:57 +0000 Message-ID: <20240211215057.357306-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Similar to commit cbe2548c91859dfb923548ea85d8531b90d53dc3 (www_coderepo: use OnDestroy to render summary view, 2023-04-09), we can rely on OnDestroy and Qspawn to run dependent commands and ensure tasks get run properly. This also allows us to avoid needless `git patch-id' invocations for patches which are too big to show. This saves nearly 2s from the linux.git root commit, bringing /pub/scm/linux/kernel/git/torvalds/linux.git/1da177e4c3f41524e886b7f1b8a0c1fc7321cac2/s/ down to 5.8s on my system. This is still ridiculously slow, git itself doesn't have a great way to get the size of patch without generating it, first. With cindex, it should be possible to index the size of a patch with default options... --- lib/PublicInbox/ViewVCS.pm | 104 +++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 38 deletions(-) diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index c8352378..e4af98ea 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -28,7 +28,8 @@ use PublicInbox::Eml; use Text::Wrap qw(wrap); use PublicInbox::Hval qw(ascii_html to_filename prurl utf8_maybe); use POSIX qw(strftime); -use autodie qw(open); +use autodie qw(open seek); +use Fcntl qw(SEEK_SET); my $hl = eval { require PublicInbox::HlMod; PublicInbox::HlMod->new; @@ -59,7 +60,7 @@ sub html_page ($$;@) { sub dbg_log ($) { my ($ctx) = @_; my $log = delete $ctx->{lh} // die 'BUG: already captured debug log'; - if (!seek($log, 0, 0)) { + if (!CORE::seek($log, 0, SEEK_SET)) { warn "seek(log): $!"; return '
debug log seek error
'; } @@ -119,17 +120,18 @@ sub show_other_result ($$) { # future-proofing } sub cmt_title { # git->cat_async callback - my ($bref, $oid, $type, $size, $ctx) = @_; + my ($bref, $oid, $type, $size, $ctx_cb) = @_; utf8_maybe($$bref); my $title = $$bref =~ /\r?\n\r?\n([^\r\n]+)\r?\n?/ ? $1 : ''; - push(@{$ctx->{-cmt_pt}} , ascii_html($title)) == @{$ctx->{-cmt_P}} and - cmt_finalize($ctx); + # $ctx_cb is [ $ctx, $cmt_fin ] + push @{$ctx_cb->[0]->{-cmt_pt}}, ascii_html($title); } sub do_cat_async { - my ($ctx, $cb, @req) = @_; + my ($arg, $cb, @req) = @_; # favor git(1) over Gcf2 (libgit2) for SHA-256 support - $ctx->{git}->cat_async($_, $cb, $ctx) for @req; + my $ctx = ref $arg eq 'ARRAY' ? $arg->[0] : $arg; + $ctx->{git}->cat_async($_, $cb, $arg) for @req; if ($ctx->{env}->{'pi-httpd.async'}) { $ctx->{git}->watch_async; } else { # synchronous, generic PSGI @@ -147,24 +149,37 @@ sub do_check_async { } } -sub show_commit_start { # ->psgi_qx callback - my ($bref, $ctx) = @_; - if (my $qsp_err = delete $ctx->{-qsp_err}) { - return html_page($ctx, 500, dbg_log($ctx) . - "git show/patch-id error:$qsp_err"); - } - my $patchid = (split(/ /, $$bref))[0]; # ignore commit - $ctx->{-q_value_html} = "patchid:$patchid" if defined $patchid; - open my $fh, '<', "$ctx->{-tmp}/h"; - chop(my $buf = do { local $/ = "\0"; <$fh> }); +sub cmt_hdr_prep { # psgi_qx cb + my ($fh, $ctx, $cmt_fin) = @_; + return if $ctx->{-qsp_err_h}; # let cmt_fin handle it + seek $fh, 0, SEEK_SET; + my $buf = do { local $/ = "\0"; <$fh> } // die "readline: $!"; + chop($buf) eq "\0" or die 'no NUL in git show -z output'; utf8_maybe($buf); # non-UTF-8 commits exist chomp $buf; - my ($P, $p); - ($P, $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9); - return cmt_finalize($ctx) if !$P; - @{$ctx->{-cmt_P}} = split(/ /, $P); - @{$ctx->{-cmt_p}} = split(/ /, $p); # abbreviated - do_cat_async($ctx, \&cmt_title, @{$ctx->{-cmt_P}}); + (my $P, my $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9); + return unless $P; + @{$ctx->{-cmt_P}} = split / /, $P; + @{$ctx->{-cmt_p}} = split / /, $p; # abbreviated + do_cat_async([$ctx, $cmt_fin], \&cmt_title, @{$ctx->{-cmt_P}}); +} + +sub read_patchid { # psgi_qx cb + my ($bref, $ctx, $cmt_fin) = @_; + my ($patchid) = split(/ /, $$bref); # ignore commit + $ctx->{-q_value_html} = "patchid:$patchid" if defined $patchid; +} + +sub cmt_patch_prep { # psgi_qx cb + my ($fh, $ctx, $cmt_fin) = @_; + return if $ctx->{-qsp_err_p}; # let cmt_fin handle error + return if -s $fh > $MAX_SIZE; # too big to show, too big to patch-id + seek $fh, 0, SEEK_SET; + my $qsp = PublicInbox::Qspawn->new( + $ctx->{git}->cmd(qw(patch-id --stable)), + undef, { 0 => $fh }); + $qsp->{qsp_err} = \$ctx->{-qsp_err_p}; + $qsp->psgi_qx($ctx->{env}, undef, \&read_patchid, $ctx, $cmt_fin); } sub ibx_url_for { @@ -194,8 +209,14 @@ sub ibx_url_for { wantarray ? (@ret) : $ret[0]; } -sub cmt_finalize { +sub cmt_fin { # OnDestroy cb my ($ctx) = @_; + my ($eh, $ep) = delete @$ctx{qw(-qsp_err_h -qsp_err_p)}; + if ($eh || $ep) { + my $e = join(' - ', grep defined, $eh, $ep); + return html_page($ctx, 500, dbg_log($ctx) . + "git show/patch-id error:$e"); + } $ctx->{-linkify} //= PublicInbox::Linkify->new; my $upfx = $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/" my ($H, $T, $s, $f, $au, $co, $bdy) = @{delete $ctx->{cmt_info}}; @@ -243,11 +264,12 @@ committer $co $s EOM print $zfh "\n", $ctx->{-linkify}->to_html($bdy) if length($bdy); - $bdy = ''; - open my $fh, '<', "$ctx->{-tmp}/p"; + undef $bdy; # free memory + my $fh = delete $ctx->{patch_fh}; if (-s $fh > $MAX_SIZE) { - print $zfh "---\n patch is too large to show\n"; + print $zfh "\n---\n patch is too large to show\n"; } else { # prepare flush_diff: + seek $fh, 0, SEEK_SET; PublicInbox::IO::read_all $fh, -s _, \$x; utf8_maybe($x); $ctx->{-apfx} = $ctx->{-spfx} = $upfx; @@ -350,18 +372,24 @@ sub show_commit ($$) { # patch-id needs two passes, and we use the initial show to ensure # a patch embedded inside the commit message body doesn't get fed # to patch-id: - my $cmd = [ '/bin/sh', '-c', - "git show --encoding=UTF-8 '$SHOW_FMT'". - " -z --no-notes --no-patch $oid >h && ". - 'git show --encoding=UTF-8 --pretty=format:%n -M'. - " --stat -p $oid >p && ". - "git patch-id --stable $git->{git_dir} }; - my $qsp = PublicInbox::Qspawn->new($cmd, $e, { -C => "$ctx->{-tmp}" }); - $qsp->{qsp_err} = \($ctx->{-qsp_err} = ''); - $ctx->{env}->{'qspawn.wcb'} = $ctx->{-wcb}; + my $genv = { GIT_DIR => $git->{git_dir} }; + my $dir = "$ctx->{-tmp}"; + my ($opt_h, $opt_p) = ({ -C => $dir }, { -C => $dir }); + open $opt_h->{1}, '+>', "$dir/h"; + open $opt_p->{1}, '+>', "$dir/p"; + $ctx->{patch_fh} = $opt_p->{1}; + my $qsp_h = PublicInbox::Qspawn->new($git->cmd('show', $SHOW_FMT, + qw(--encoding=UTF-8 -z --no-notes --no-patch), $oid), + $genv, $opt_h); + my $qsp_p = PublicInbox::Qspawn->new($git->cmd(qw(show + --encoding=UTF-8 --pretty=format:%n -M --stat -p), $oid), + $genv, $opt_p); + $qsp_h->{qsp_err} = \($ctx->{-qsp_err_h} = ''); + $qsp_p->{qsp_err} = \($ctx->{-qsp_err_p} = ''); + my $cmt_fin = PublicInbox::OnDestroy->new($$, \&cmt_fin, $ctx); $ctx->{git} = $git; - $qsp->psgi_qx($ctx->{env}, undef, \&show_commit_start, $ctx); + $qsp_h->psgi_qx($ctx->{env}, undef, \&cmt_hdr_prep, $ctx, $cmt_fin); + $qsp_p->psgi_qx($ctx->{env}, undef, \&cmt_patch_prep, $ctx, $cmt_fin); } sub show_other ($$) { # just in case...