From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id D4D191F47D for ; Sun, 8 Oct 2023 11:45:34 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1696765534; bh=DAp4rQwidPGNOEBwoGOpbEoLKbetKh0z8q3wQUXPWaQ=; h=From:To:Subject:Date:In-Reply-To:References:From; b=nQg/lt9DObPYFhOJg1eFUnJ0lZnqaNLOOom1MqeDneLzf2i03xTghOVzNiTul1a5A /3ZMjzIyNBGqOM0Bjfy+L+Cqiz3oMarVPKgpW1cWnDFSYaq7nmqf1xqjdU6dsx5qfB LjzjbqOYXvhGMww16KE8n+GFNBbKxc1dWs9wGvIE= From: Eric Wong To: spew@80x24.org Subject: [PATCH 3/4] lei_xsearch: improve curl progress reporting Date: Sun, 8 Oct 2023 11:45:33 +0000 Message-ID: <20231008114534.274616-3-e@80x24.org> In-Reply-To: <20231008114534.274616-1-e@80x24.org> References: <20231008114534.274616-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Instead of having tail(1) follow a file when we're in verbose mode, unconditionally pipe stderr to a Perl 2-liner which tees its output to a regular file with line buffering. POSIX tee(1) isn't suitable for this task since it's required to be completely unbuffered while we want line-buffering when running parallel processes. Fortunately, Perl makes this easy. This also means we no longer leave curl-err.XXXX files around on premature shutdown if we're hit by a SIGKILL or similar and can't exit normally. We do need to stop and respawn the Perl process if we hit a curl error, though, since we need to be certain the output is flushed. --- lib/PublicInbox/LeiXSearch.pm | 48 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index ef66aaea..bfa91200 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -12,16 +12,15 @@ use PublicInbox::DS qw(now); use File::Temp 0.19 (); # 0.19 for ->newdir use File::Spec (); use PublicInbox::Search qw(xap_terms); -use PublicInbox::Spawn qw(popen_rd spawn which); +use PublicInbox::Spawn qw(popen_rd popen_wr which); use PublicInbox::MID qw(mids); use PublicInbox::Smsg; -use PublicInbox::AutoReap; use PublicInbox::Eml; use PublicInbox::LEI; use Fcntl qw(SEEK_SET F_SETFL O_APPEND O_RDWR); use PublicInbox::ContentHash qw(git_sha); use POSIX qw(strftime); -use autodie qw(seek truncate); +use autodie qw(open read seek truncate); sub new { my ($class) = @_; @@ -330,19 +329,19 @@ sub query_remote_mboxrd { $qstr =~ s/[ \n\t]+/ /sg; # make URLs less ugly my @qform = (x => 'm'); push(@qform, t => 1) if $opt->{threads}; - my $verbose = $opt->{verbose}; - my $reap_tail; - my $cerr = File::Temp->new(TEMPLATE => 'curl.err-XXXX', TMPDIR => 1); + open my $cerr, '+>', undef; fcntl($cerr, F_SETFL, O_APPEND|O_RDWR) or warn "set O_APPEND: $!"; - my $rdr = { 2 => $cerr }; - if ($verbose) { - # spawn a process to force line-buffering, otherwise curl - # will write 1 character at-a-time and parallel outputs - # mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss - my $o = { 1 => $lei->{2}, 2 => $lei->{2} }; - my $pid = spawn(['tail', '-f', $cerr->filename], undef, $o); - $reap_tail = PublicInbox::AutoReap->new($pid); - } + + # spawn a line-buffered tee(1) script, otherwise curl + # will write 1 character at-a-time and parallel outputs + # mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss + # (n.b. POSIX tee(1) cannot do any buffering) + my $topt = { 1 => $cerr, 2 => $lei->{2} }; + my $rdr; + my $lbf_tee = [ $^X, qw(-w -p -e), <<'' ]; +BEGIN { $| = 1; use IO::Handle; STDERR->autoflush(1); } +print STDERR $_; + my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return; push @$curl, '-s', '-d', ''; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); @@ -354,6 +353,7 @@ sub query_remote_mboxrd { $uri->query_form(@qform, q => $q); my $cmd = $curl->for_uri($lei, $uri); $lei->qerr("# $cmd"); + $rdr->{2} //= popen_wr($lbf_tee, undef, $topt); my $cfh = popen_rd($cmd, undef, $rdr); my $fh = IO::Uncompress::Gunzip->new($cfh, MultiStream => 1); PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self, @@ -361,21 +361,19 @@ sub query_remote_mboxrd { $lei->sto_done_request if delete($self->{-sto_imported}); my $nr = delete $lei->{-nr_remote_eml} // 0; close $cfh; - if ($? == 0) { # don't update if no results, maybe MTA is down + my $code = $?; + if (!$code) { # don't update if no results, maybe MTA is down $lei->{lss}->cfg_set($key, $start) if $key && $nr; mset_progress($lei, $lei->{-current_url}, $nr, $nr); next; } - my $err; - eval { - seek($cerr, 0, SEEK_SET); - read($cerr, $err, -s $cerr); - truncate($cerr, 0); - }; - warn "E: $@ ($cmd stderr)" if $@; - next if (($? >> 8) == 22 && $err =~ /\b404\b/); + close(delete($rdr->{2})); + seek($cerr, 0, SEEK_SET); + read($cerr, my $err, -s $cerr); + truncate($cerr, 0); + next if (($code >> 8) == 22 && $err =~ /\b404\b/); $uri->query_form(q => $qstr); - $lei->child_error($?, "E: <$uri> $err"); + $lei->child_error($code, "E: <$uri> `$cmd` failed"); } undef $each_smsg; $lei->{ovv}->ovv_atexit_child($lei);