dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH] viewvcs: parallelize commit display
@ 2024-02-11 21:50 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2024-02-11 21:50 UTC (permalink / raw)
  To: spew

Similar to commit cbe2548c91859dfb923548ea85d8531b90d53dc3
(www_coderepo: use OnDestroy to render summary view,
2023-04-09), we can rely on OnDestroy and Qspawn to run
dependent commands and ensure tasks get run properly.  This also
allows us to avoid needless `git patch-id' invocations for
patches which are too big to show.

This saves nearly 2s from the linux.git root commit, bringing
/pub/scm/linux/kernel/git/torvalds/linux.git/1da177e4c3f41524e886b7f1b8a0c1fc7321cac2/s/
down to 5.8s on my system.  This is still ridiculously slow,
git itself doesn't have a great way to get the size of patch
without generating it, first.

With cindex, it should be possible to index the size of a patch
with default options...
---
 lib/PublicInbox/ViewVCS.pm | 104 +++++++++++++++++++++++--------------
 1 file changed, 66 insertions(+), 38 deletions(-)

diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index c8352378..e4af98ea 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -28,7 +28,8 @@ use PublicInbox::Eml;
 use Text::Wrap qw(wrap);
 use PublicInbox::Hval qw(ascii_html to_filename prurl utf8_maybe);
 use POSIX qw(strftime);
-use autodie qw(open);
+use autodie qw(open seek);
+use Fcntl qw(SEEK_SET);
 my $hl = eval {
 	require PublicInbox::HlMod;
 	PublicInbox::HlMod->new;
@@ -59,7 +60,7 @@ sub html_page ($$;@) {
 sub dbg_log ($) {
 	my ($ctx) = @_;
 	my $log = delete $ctx->{lh} // die 'BUG: already captured debug log';
-	if (!seek($log, 0, 0)) {
+	if (!CORE::seek($log, 0, SEEK_SET)) {
 		warn "seek(log): $!";
 		return '<pre>debug log seek error</pre>';
 	}
@@ -119,17 +120,18 @@ sub show_other_result ($$) { # future-proofing
 }
 
 sub cmt_title { # git->cat_async callback
-	my ($bref, $oid, $type, $size, $ctx) = @_;
+	my ($bref, $oid, $type, $size, $ctx_cb) = @_;
 	utf8_maybe($$bref);
 	my $title = $$bref =~ /\r?\n\r?\n([^\r\n]+)\r?\n?/ ? $1 : '';
-	push(@{$ctx->{-cmt_pt}} , ascii_html($title)) == @{$ctx->{-cmt_P}} and
-		cmt_finalize($ctx);
+	# $ctx_cb is [ $ctx, $cmt_fin ]
+	push @{$ctx_cb->[0]->{-cmt_pt}}, ascii_html($title);
 }
 
 sub do_cat_async {
-	my ($ctx, $cb, @req) = @_;
+	my ($arg, $cb, @req) = @_;
 	# favor git(1) over Gcf2 (libgit2) for SHA-256 support
-	$ctx->{git}->cat_async($_, $cb, $ctx) for @req;
+	my $ctx = ref $arg eq 'ARRAY' ? $arg->[0] : $arg;
+	$ctx->{git}->cat_async($_, $cb, $arg) for @req;
 	if ($ctx->{env}->{'pi-httpd.async'}) {
 		$ctx->{git}->watch_async;
 	} else { # synchronous, generic PSGI
@@ -147,24 +149,37 @@ sub do_check_async {
 	}
 }
 
-sub show_commit_start { # ->psgi_qx callback
-	my ($bref, $ctx) = @_;
-	if (my $qsp_err = delete $ctx->{-qsp_err}) {
-		return html_page($ctx, 500, dbg_log($ctx) .
-				"git show/patch-id error:$qsp_err");
-	}
-	my $patchid = (split(/ /, $$bref))[0]; # ignore commit
-	$ctx->{-q_value_html} = "patchid:$patchid" if defined $patchid;
-	open my $fh, '<', "$ctx->{-tmp}/h";
-	chop(my $buf = do { local $/ = "\0"; <$fh> });
+sub cmt_hdr_prep { # psgi_qx cb
+	my ($fh, $ctx, $cmt_fin) = @_;
+	return if $ctx->{-qsp_err_h}; # let cmt_fin handle it
+	seek $fh, 0, SEEK_SET;
+	my $buf = do { local $/ = "\0"; <$fh> } // die "readline: $!";
+	chop($buf) eq "\0" or die 'no NUL in git show -z output';
 	utf8_maybe($buf); # non-UTF-8 commits exist
 	chomp $buf;
-	my ($P, $p);
-	($P, $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9);
-	return cmt_finalize($ctx) if !$P;
-	@{$ctx->{-cmt_P}} = split(/ /, $P);
-	@{$ctx->{-cmt_p}} = split(/ /, $p); # abbreviated
-	do_cat_async($ctx, \&cmt_title, @{$ctx->{-cmt_P}});
+	(my $P, my $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9);
+	return unless $P;
+	@{$ctx->{-cmt_P}} = split / /, $P;
+	@{$ctx->{-cmt_p}} = split / /, $p; # abbreviated
+	do_cat_async([$ctx, $cmt_fin], \&cmt_title, @{$ctx->{-cmt_P}});
+}
+
+sub read_patchid { # psgi_qx cb
+	my ($bref, $ctx, $cmt_fin) = @_;
+	my ($patchid) = split(/ /, $$bref); # ignore commit
+	$ctx->{-q_value_html} = "patchid:$patchid" if defined $patchid;
+}
+
+sub cmt_patch_prep { # psgi_qx cb
+	my ($fh, $ctx, $cmt_fin) = @_;
+	return if $ctx->{-qsp_err_p}; # let cmt_fin handle error
+	return if -s $fh > $MAX_SIZE; # too big to show, too big to patch-id
+	seek $fh, 0, SEEK_SET;
+	my $qsp = PublicInbox::Qspawn->new(
+				$ctx->{git}->cmd(qw(patch-id --stable)),
+				undef, { 0 => $fh });
+	$qsp->{qsp_err} = \$ctx->{-qsp_err_p};
+	$qsp->psgi_qx($ctx->{env}, undef, \&read_patchid, $ctx, $cmt_fin);
 }
 
 sub ibx_url_for {
@@ -194,8 +209,14 @@ sub ibx_url_for {
 	wantarray ? (@ret) : $ret[0];
 }
 
-sub cmt_finalize {
+sub cmt_fin { # OnDestroy cb
 	my ($ctx) = @_;
+	my ($eh, $ep) = delete @$ctx{qw(-qsp_err_h -qsp_err_p)};
+	if ($eh || $ep) {
+		my $e = join(' - ', grep defined, $eh, $ep);
+		return html_page($ctx, 500, dbg_log($ctx) .
+				"git show/patch-id error:$e");
+	}
 	$ctx->{-linkify} //= PublicInbox::Linkify->new;
 	my $upfx = $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/"
 	my ($H, $T, $s, $f, $au, $co, $bdy) = @{delete $ctx->{cmt_info}};
@@ -243,11 +264,12 @@ committer $co
 <b>$s</b>
 EOM
 	print $zfh "\n", $ctx->{-linkify}->to_html($bdy) if length($bdy);
-	$bdy = '';
-	open my $fh, '<', "$ctx->{-tmp}/p";
+	undef $bdy; # free memory
+	my $fh = delete $ctx->{patch_fh};
 	if (-s $fh > $MAX_SIZE) {
-		print $zfh "---\n patch is too large to show\n";
+		print $zfh "\n---\n patch is too large to show\n";
 	} else { # prepare flush_diff:
+		seek $fh, 0, SEEK_SET;
 		PublicInbox::IO::read_all $fh, -s _, \$x;
 		utf8_maybe($x);
 		$ctx->{-apfx} = $ctx->{-spfx} = $upfx;
@@ -350,18 +372,24 @@ sub show_commit ($$) {
 	# patch-id needs two passes, and we use the initial show to ensure
 	# a patch embedded inside the commit message body doesn't get fed
 	# to patch-id:
-	my $cmd = [ '/bin/sh', '-c',
-		"git show --encoding=UTF-8 '$SHOW_FMT'".
-		" -z --no-notes --no-patch $oid >h && ".
-		'git show --encoding=UTF-8 --pretty=format:%n -M'.
-		" --stat -p $oid >p && ".
-		"git patch-id --stable <p" ];
-	my $e = { GIT_DIR => $git->{git_dir} };
-	my $qsp = PublicInbox::Qspawn->new($cmd, $e, { -C => "$ctx->{-tmp}" });
-	$qsp->{qsp_err} = \($ctx->{-qsp_err} = '');
-	$ctx->{env}->{'qspawn.wcb'} = $ctx->{-wcb};
+	my $genv = { GIT_DIR => $git->{git_dir} };
+	my $dir = "$ctx->{-tmp}";
+	my ($opt_h, $opt_p) = ({ -C => $dir }, { -C => $dir });
+	open $opt_h->{1}, '+>', "$dir/h";
+	open $opt_p->{1}, '+>', "$dir/p";
+	$ctx->{patch_fh} = $opt_p->{1};
+	my $qsp_h = PublicInbox::Qspawn->new($git->cmd('show', $SHOW_FMT,
+		qw(--encoding=UTF-8 -z --no-notes --no-patch), $oid),
+		$genv, $opt_h);
+	my $qsp_p = PublicInbox::Qspawn->new($git->cmd(qw(show
+		--encoding=UTF-8 --pretty=format:%n -M --stat -p), $oid),
+		$genv, $opt_p);
+	$qsp_h->{qsp_err} = \($ctx->{-qsp_err_h} = '');
+	$qsp_p->{qsp_err} = \($ctx->{-qsp_err_p} = '');
+	my $cmt_fin = PublicInbox::OnDestroy->new($$, \&cmt_fin, $ctx);
 	$ctx->{git} = $git;
-	$qsp->psgi_qx($ctx->{env}, undef, \&show_commit_start, $ctx);
+	$qsp_h->psgi_qx($ctx->{env}, undef, \&cmt_hdr_prep, $ctx, $cmt_fin);
+	$qsp_p->psgi_qx($ctx->{env}, undef, \&cmt_patch_prep, $ctx, $cmt_fin);
 }
 
 sub show_other ($$) { # just in case...

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2024-02-11 21:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-11 21:50 [PATCH] viewvcs: parallelize commit display Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).