dumping ground for random patches and texts
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH 3/4] lei_xsearch: improve curl progress reporting
Date: Sun,  8 Oct 2023 11:45:33 +0000	[thread overview]
Message-ID: <20231008114534.274616-3-e@80x24.org> (raw)
In-Reply-To: <20231008114534.274616-1-e@80x24.org>

Instead of having tail(1) follow a file when we're in verbose
mode, unconditionally pipe stderr to a Perl 2-liner which tees
its output to a regular file with line buffering.

POSIX tee(1) isn't suitable for this task since it's required
to be completely unbuffered while we want line-buffering when
running parallel processes.  Fortunately, Perl makes this easy.

This also means we no longer leave curl-err.XXXX files around
on premature shutdown if we're hit by a SIGKILL or similar and
can't exit normally.

We do need to stop and respawn the Perl process if we hit a curl
error, though, since we need to be certain the output is
flushed.
---
 lib/PublicInbox/LeiXSearch.pm | 48 +++++++++++++++++------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index ef66aaea..bfa91200 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -12,16 +12,15 @@ use PublicInbox::DS qw(now);
 use File::Temp 0.19 (); # 0.19 for ->newdir
 use File::Spec ();
 use PublicInbox::Search qw(xap_terms);
-use PublicInbox::Spawn qw(popen_rd spawn which);
+use PublicInbox::Spawn qw(popen_rd popen_wr which);
 use PublicInbox::MID qw(mids);
 use PublicInbox::Smsg;
-use PublicInbox::AutoReap;
 use PublicInbox::Eml;
 use PublicInbox::LEI;
 use Fcntl qw(SEEK_SET F_SETFL O_APPEND O_RDWR);
 use PublicInbox::ContentHash qw(git_sha);
 use POSIX qw(strftime);
-use autodie qw(seek truncate);
+use autodie qw(open read seek truncate);
 
 sub new {
 	my ($class) = @_;
@@ -330,19 +329,19 @@ sub query_remote_mboxrd {
 	$qstr =~ s/[ \n\t]+/ /sg; # make URLs less ugly
 	my @qform = (x => 'm');
 	push(@qform, t => 1) if $opt->{threads};
-	my $verbose = $opt->{verbose};
-	my $reap_tail;
-	my $cerr = File::Temp->new(TEMPLATE => 'curl.err-XXXX', TMPDIR => 1);
+	open my $cerr, '+>', undef;
 	fcntl($cerr, F_SETFL, O_APPEND|O_RDWR) or warn "set O_APPEND: $!";
-	my $rdr = { 2 => $cerr };
-	if ($verbose) {
-		# spawn a process to force line-buffering, otherwise curl
-		# will write 1 character at-a-time and parallel outputs
-		# mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss
-		my $o = { 1 => $lei->{2}, 2 => $lei->{2} };
-		my $pid = spawn(['tail', '-f', $cerr->filename], undef, $o);
-		$reap_tail = PublicInbox::AutoReap->new($pid);
-	}
+
+	# spawn a line-buffered tee(1) script, otherwise curl
+	# will write 1 character at-a-time and parallel outputs
+	# mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss
+	# (n.b. POSIX tee(1) cannot do any buffering)
+	my $topt = { 1 => $cerr, 2 => $lei->{2} };
+	my $rdr;
+	my $lbf_tee = [ $^X, qw(-w -p -e), <<'' ];
+BEGIN { $| = 1; use IO::Handle; STDERR->autoflush(1); }
+print STDERR $_;
+
 	my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return;
 	push @$curl, '-s', '-d', '';
 	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
@@ -354,6 +353,7 @@ sub query_remote_mboxrd {
 		$uri->query_form(@qform, q => $q);
 		my $cmd = $curl->for_uri($lei, $uri);
 		$lei->qerr("# $cmd");
+		$rdr->{2} //= popen_wr($lbf_tee, undef, $topt);
 		my $cfh = popen_rd($cmd, undef, $rdr);
 		my $fh = IO::Uncompress::Gunzip->new($cfh, MultiStream => 1);
 		PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self,
@@ -361,21 +361,19 @@ sub query_remote_mboxrd {
 		$lei->sto_done_request if delete($self->{-sto_imported});
 		my $nr = delete $lei->{-nr_remote_eml} // 0;
 		close $cfh;
-		if ($? == 0) { # don't update if no results, maybe MTA is down
+		my $code = $?;
+		if (!$code) { # don't update if no results, maybe MTA is down
 			$lei->{lss}->cfg_set($key, $start) if $key && $nr;
 			mset_progress($lei, $lei->{-current_url}, $nr, $nr);
 			next;
 		}
-		my $err;
-		eval {
-			seek($cerr, 0, SEEK_SET);
-			read($cerr, $err, -s $cerr);
-			truncate($cerr, 0);
-		};
-		warn "E: $@ ($cmd stderr)" if $@;
-		next if (($? >> 8) == 22 && $err =~ /\b404\b/);
+		close(delete($rdr->{2}));
+		seek($cerr, 0, SEEK_SET);
+		read($cerr, my $err, -s $cerr);
+		truncate($cerr, 0);
+		next if (($code >> 8) == 22 && $err =~ /\b404\b/);
 		$uri->query_form(q => $qstr);
-		$lei->child_error($?, "E: <$uri> $err");
+		$lei->child_error($code, "E: <$uri> `$cmd` failed");
 	}
 	undef $each_smsg;
 	$lei->{ovv}->ovv_atexit_child($lei);

  parent reply	other threads:[~2023-10-08 11:45 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-08 11:45 [PATCH 1/4] process_io: fix binmode and use it in lei_xsearch for curl Eric Wong
2023-10-08 11:45 ` [PATCH 2/4] lei rediff: use ProcessIO for --drq support Eric Wong
2023-10-08 11:45 ` Eric Wong [this message]
2023-10-08 11:45 ` [PATCH 4/4] lei_{input,remote}: drop AutoReap usage for curl Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231008114534.274616-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=spew@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).