dumping ground for random patches and texts
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH 02/18] spawn: support synchronous run_qx
Date: Thu, 19 Oct 2023 12:40:02 +0000	[thread overview]
Message-ID: <20231019124018.2109632-2-e@80x24.org> (raw)
In-Reply-To: <20231019124018.2109632-1-e@80x24.org>

This is similar to `backtick` but supports all our existing spawn
functionality (chdir, env, rlimit, redirects, etc.).  It also
supports SCALAR ref redirects like run_script in our test suite
for std{in,out,err}.

We can probably use :utf8 by default for these redirects, even.
---
 lib/PublicInbox/Git.pm       |  6 ++++
 lib/PublicInbox/SearchIdx.pm | 19 ++++------
 lib/PublicInbox/Spawn.pm     | 69 ++++++++++++++++++++++++++----------
 t/spawn.t                    | 13 ++++++-
 4 files changed, 76 insertions(+), 31 deletions(-)

diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index a460d155..476dcf30 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -69,6 +69,7 @@ sub check_git_exe () {
 		$GIT_VER = eval("v$1") // die "BUG: bad vstring: $1 ($v)";
 		$EXE_ST = $st;
 	}
+	$GIT_EXE;
 }
 
 sub git_version {
@@ -422,6 +423,11 @@ sub async_err ($$$$$) {
 	$async_warn ? carp($msg) : $self->fail($msg);
 }
 
+sub cmd {
+	my $self = shift;
+	[ $GIT_EXE // check_git_exe(), "--git-dir=$self->{git_dir}", @_ ]
+}
+
 # $git->popen(qw(show f00)); # or
 # $git->popen(qw(show f00), { GIT_CONFIG => ... }, { 2 => ... });
 sub popen {
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 8a571cfb..3c64c715 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -22,7 +22,7 @@ use POSIX qw(strftime);
 use Fcntl qw(SEEK_SET);
 use Time::Local qw(timegm);
 use PublicInbox::OverIdx;
-use PublicInbox::Spawn qw(run_wait);
+use PublicInbox::Spawn qw(run_wait run_qx);
 use PublicInbox::Git qw(git_unquote);
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
 use PublicInbox::Address;
@@ -351,23 +351,18 @@ sub index_diff ($$$) {
 }
 
 sub patch_id {
-	my ($self) = @_; # $_[1] is the diff (may be huge)
-	open(my $fh, '+>:utf8', undef) or die "open: $!";
-	open(my $eh, '+>', undef) or die "open: $!";
-	$fh->autoflush(1);
-	print $fh $_[1] or die "print: $!";
-	sysseek($fh, 0, SEEK_SET) or die "sysseek: $!";
-	my $id = ($self->{ibx} // $self->{eidx} // $self)->git->qx(
-			[qw(patch-id --stable)], {}, { 0 => $fh, 2 => $eh });
-	seek($eh, 0, SEEK_SET) or die "seek: $!";
-	while (<$eh>) { warn $_ }
+	my ($self, $sref) = @_;
+	my $git = ($self->{ibx} // $self->{eidx} // $self)->git;
+	my $opt = { 0 => $sref, 2 => \(my $err) };
+	my $id = run_qx($git->cmd(qw(patch-id --stable)), undef, $opt);
+	warn $err if $err;
 	$id =~ /\A([a-f0-9]{40,})/ ? $1 : undef;
 }
 
 sub index_body_text {
 	my ($self, $doc, $sref) = @_;
 	if ($$sref =~ /^(?:diff|---|\+\+\+) /ms) {
-		my $id = patch_id($self, $$sref);
+		my $id = patch_id($self, $sref);
 		$doc->add_term('XDFID'.$id) if defined($id);
 	}
 
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index 106f5e01..1fa7a41f 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -22,8 +22,9 @@ use Fcntl qw(SEEK_SET);
 use IO::Handle ();
 use Carp qw(croak);
 use PublicInbox::ProcessIO;
-our @EXPORT_OK = qw(which spawn popen_rd popen_wr run_die run_wait);
+our @EXPORT_OK = qw(which spawn popen_rd popen_wr run_die run_wait run_qx);
 our @RLIMITS = qw(RLIMIT_CPU RLIMIT_CORE RLIMIT_DATA);
+use autodie qw(open pipe read seek sysseek truncate);
 
 BEGIN {
 	my $all_libc = <<'ALL_LIBC'; # all *nix systems we support
@@ -290,7 +291,6 @@ ALL_LIBC
 	undef $all_libc unless -d $inline_dir;
 	if (defined $all_libc) {
 		local $ENV{PERL_INLINE_DIRECTORY} = $inline_dir;
-		use autodie;
 		# CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
 		my $lk = PublicInbox::Lock->new($inline_dir.
 						'/.public-inbox.lock');
@@ -301,7 +301,7 @@ ALL_LIBC
 		open STDERR, '>&', $fh;
 		STDERR->autoflush(1);
 		STDOUT->autoflush(1);
-		CORE::eval 'use Inline C => $all_libc, BUILD_NOISY => 1';
+		eval 'use Inline C => $all_libc, BUILD_NOISY => 1';
 		my $err = $@;
 		open(STDERR, '>&', $olderr);
 		open(STDOUT, '>&', $oldout);
@@ -332,26 +332,34 @@ sub which ($) {
 }
 
 sub spawn ($;$$) {
-	my ($cmd, $env, $opts) = @_;
+	my ($cmd, $env, $opt) = @_;
 	my $f = which($cmd->[0]) // die "$cmd->[0]: command not found\n";
-	my @env;
+	my (@env, @rdr);
 	my %env = (%ENV, $env ? %$env : ());
 	while (my ($k, $v) = each %env) {
 		push @env, "$k=$v" if defined($v);
 	}
-	my $redir = [];
 	for my $child_fd (0..2) {
-		my $parent_fd = $opts->{$child_fd};
-		if (defined($parent_fd) && $parent_fd !~ /\A[0-9]+\z/) {
-			my $fd = fileno($parent_fd) //
-					die "$parent_fd not an IO GLOB? $!";
-			$parent_fd = $fd;
+		my $pfd = $opt->{$child_fd};
+		if ('SCALAR' eq ref($pfd)) {
+			open my $fh, '+>:utf8', undef;
+			$opt->{"fh.$child_fd"} = $fh;
+			if ($child_fd == 0) {
+				print $fh $$pfd;
+				$fh->flush or die "flush: $!";
+				sysseek($fh, 0, SEEK_SET);
+			}
+			$pfd = fileno($fh);
+		} elsif (defined($pfd) && $pfd !~ /\A[0-9]+\z/) {
+			my $fd = fileno($pfd) //
+					die "$pfd not an IO GLOB? $!";
+			$pfd = $fd;
 		}
-		$redir->[$child_fd] = $parent_fd // $child_fd;
+		$rdr[$child_fd] = $pfd // $child_fd;
 	}
 	my $rlim = [];
 	foreach my $l (@RLIMITS) {
-		my $v = $opts->{$l} // next;
+		my $v = $opt->{$l} // next;
 		my $r = eval "require BSD::Resource; BSD::Resource::$l();";
 		unless (defined $r) {
 			warn "$l undefined by BSD::Resource: $@\n";
@@ -359,31 +367,41 @@ sub spawn ($;$$) {
 		}
 		push @$rlim, $r, @$v;
 	}
-	my $cd = $opts->{'-C'} // ''; # undef => NULL mapping doesn't work?
-	my $pgid = $opts->{pgid} // -1;
-	my $pid = pi_fork_exec($redir, $f, $cmd, \@env, $rlim, $cd, $pgid);
+	my $cd = $opt->{'-C'} // ''; # undef => NULL mapping doesn't work?
+	my $pgid = $opt->{pgid} // -1;
+	my $pid = pi_fork_exec(\@rdr, $f, $cmd, \@env, $rlim, $cd, $pgid);
 	die "fork_exec @$cmd failed: $!\n" unless $pid > 0;
 	$pid;
 }
 
 sub popen_rd {
 	my ($cmd, $env, $opt, @cb_arg) = @_;
-	pipe(my $r, local $opt->{1}) or die "pipe: $!\n";
+	pipe(my $r, local $opt->{1});
 	my $pid = spawn($cmd, $env, $opt);
 	PublicInbox::ProcessIO->maybe_new($pid, $r, @cb_arg);
 }
 
 sub popen_wr {
 	my ($cmd, $env, $opt, @cb_arg) = @_;
-	pipe(local $opt->{0}, my $w) or die "pipe: $!\n";
+	pipe(local $opt->{0}, my $w);
 	$w->autoflush(1);
 	my $pid = spawn($cmd, $env, $opt);
 	PublicInbox::ProcessIO->maybe_new($pid, $w, @cb_arg)
 }
 
+sub read_out_err ($) {
+	my ($opt) = @_;
+	for my $fd (1, 2) { # read stdout/stderr
+		my $fh = delete($opt->{"fh.$fd"}) // next;
+		seek($fh, 0, SEEK_SET);
+		read($fh, ${$opt->{$fd}}, -s $fh, length(${$opt->{$fd}} // ''));
+	}
+}
+
 sub run_wait ($;$$) {
 	my ($cmd, $env, $opt) = @_;
 	waitpid(spawn($cmd, $env, $opt), 0);
+	read_out_err($opt);
 	$?
 }
 
@@ -392,4 +410,19 @@ sub run_die ($;$$) {
 	run_wait($cmd, $env, $rdr) and croak "E: @$cmd failed: \$?=$?";
 }
 
+sub run_qx {
+	my ($cmd, $env, $opt) = @_;
+	my $fh = popen_rd($cmd, $env, $opt);
+	my @ret;
+	if (wantarray) {
+		@ret = <$fh>;
+	} else {
+		local $/;
+		$ret[0] = <$fh>;
+	}
+	close $fh; # caller should check $?
+	read_out_err($opt);
+	wantarray ? @ret : $ret[0];
+}
+
 1;
diff --git a/t/spawn.t b/t/spawn.t
index 1af66bda..4b3baae4 100644
--- a/t/spawn.t
+++ b/t/spawn.t
@@ -3,7 +3,7 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use v5.12;
 use Test::More;
-use PublicInbox::Spawn qw(which spawn popen_rd);
+use PublicInbox::Spawn qw(which spawn popen_rd run_qx);
 require PublicInbox::Sigfd;
 require PublicInbox::DS;
 
@@ -19,6 +19,17 @@ require PublicInbox::DS;
 	is($?, 0, 'true exited successfully');
 }
 
+{
+	my $opt = { 0 => \'in', 2 => \(my $e) };
+	my $out = run_qx(['sh', '-c', 'echo e >&2; cat'], undef, $opt);
+	is($e, "e\n", 'captured stderr');
+	is($out, 'in', 'stdin read and stdout captured');
+	$opt->{0} = \"IN\n3\nLINES";
+	my @out = run_qx(['sh', '-c', 'echo E >&2; cat'], undef, $opt);
+	is($e, "e\nE\n", 'captured stderr appended to string');
+	is_deeply(\@out, [ "IN\n", "3\n", 'LINES' ], 'stdout array');
+}
+
 SKIP: {
 	my $pid = spawn(['true'], undef, { pgid => 0 });
 	ok($pid, 'spawned process with new pgid');

  reply	other threads:[~2023-10-19 12:40 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-19 12:40 [PATCH 01/18] limiter: split out from qspawn Eric Wong
2023-10-19 12:40 ` Eric Wong [this message]
2023-10-19 12:40 ` [PATCH 03/18] psgi_qx: use a temporary file rather than pipe Eric Wong
2023-10-19 12:40 ` [PATCH 04/18] www_coderepo: capture uses a flattened list Eric Wong
2023-10-19 12:40 ` [PATCH 05/18] qspawn: psgi_return allows list for callback args Eric Wong
2023-10-19 12:40 ` [PATCH 06/18] qspawn: drop unused err arg for ->event_step Eric Wong
2023-10-19 12:40 ` [PATCH 07/18] httpd/async: require IO arg Eric Wong
2023-10-19 12:40 ` [PATCH 08/18] tests: move reset Eric Wong
2023-10-19 12:40 ` [PATCH 09/18] qspawn: introduce psgi_yield API Eric Wong
2023-10-19 12:40 ` [PATCH 10/18] repo_atom: switch to psgi_yield Eric Wong
2023-10-19 12:40 ` [PATCH 11/18] repo_snapshot: psgi_yield Eric Wong
2023-10-19 12:40 ` [PATCH 12/18] viewvcs: psgi_yield Eric Wong
2023-10-19 12:40 ` [PATCH 13/18] www_altid: psgi_yield Eric Wong
2023-10-19 12:40 ` [PATCH 14/18] cgit: psgi_yield Eric Wong
2023-10-19 12:40 ` [PATCH 15/18] www_coderepo: psgi_yield Eric Wong
2023-10-19 12:40 ` [PATCH 16/18] githttpbackend: fix outdated comments Eric Wong
2023-10-19 12:40 ` [PATCH 17/18] drop psgi_return and httpd/async entirely Eric Wong
2023-10-19 12:40 ` [PATCH 18/18] kill getlinebody Eric Wong
  -- strict thread matches above, loose matches on Subject: below --
2023-10-23  8:48 [PATCH 01/18] limiter: split out from qspawn Eric Wong
2023-10-23  8:48 ` [PATCH 02/18] spawn: support synchronous run_qx Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231019124018.2109632-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=spew@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).