about summary refs log tree commit homepage
path: root/lib/PublicInbox/LeiXSearch.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-10-11 07:20:50 +0000
committerEric Wong <e@80x24.org>2023-10-11 22:10:43 +0000
commita7c337f88a1768fdc7d4d5a06b52d566ddc99c19 (patch)
tree1c67cf3467ceeeb2a9ad58ae96e33834ba493687 /lib/PublicInbox/LeiXSearch.pm
parent01e282ff7524256e8cc8e32a10baaa642ee76b70 (diff)
downloadpublic-inbox-a7c337f88a1768fdc7d4d5a06b52d566ddc99c19.tar.gz
Instead of having tail(1) follow a file when we're in verbose
mode, unconditionally pipe stderr to a Perl 2-liner which tees
its output to a regular file with line buffering.

POSIX tee(1) isn't suitable for this task since it's required
to be completely unbuffered while we want line-buffering when
running parallel processes.  Fortunately, Perl makes this easy.

This also means we no longer leave curl-err.XXXX files around
on premature shutdown if we're hit by a SIGKILL or similar and
can't exit normally.

We do need to stop and respawn the Perl process if we hit a curl
error, though, since we need to be certain the output is
flushed.
Diffstat (limited to 'lib/PublicInbox/LeiXSearch.pm')
-rw-r--r--lib/PublicInbox/LeiXSearch.pm34
1 files changed, 19 insertions, 15 deletions
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index fbafa324..2a4af3e7 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -12,16 +12,15 @@ use PublicInbox::DS qw(now);
 use File::Temp 0.19 (); # 0.19 for ->newdir
 use File::Spec ();
 use PublicInbox::Search qw(xap_terms);
-use PublicInbox::Spawn qw(popen_rd spawn which);
+use PublicInbox::Spawn qw(popen_rd popen_wr which);
 use PublicInbox::MID qw(mids);
 use PublicInbox::Smsg;
-use PublicInbox::AutoReap;
 use PublicInbox::Eml;
 use PublicInbox::LEI;
 use Fcntl qw(SEEK_SET F_SETFL O_APPEND O_RDWR);
 use PublicInbox::ContentHash qw(git_sha);
 use POSIX qw(strftime);
-use autodie qw(read seek truncate);
+use autodie qw(open read seek truncate);
 
 sub new {
         my ($class) = @_;
@@ -330,18 +329,20 @@ sub query_remote_mboxrd {
         $qstr =~ s/[ \n\t]+/ /sg; # make URLs less ugly
         my @qform = (x => 'm');
         push(@qform, t => 1) if $opt->{threads};
-        my $verbose = $opt->{verbose};
-        my $reap_tail;
-        my $cerr = File::Temp->new(TEMPLATE => 'curl.err-XXXX', TMPDIR => 1);
-        fcntl($cerr, F_SETFL, O_APPEND|O_RDWR) or warn "set O_APPEND: $!";
+        open my $cerr, '+>', undef;
         my $rdr = { 2 => $cerr };
-        if ($verbose) {
-                # spawn a process to force line-buffering, otherwise curl
+        my @lbf_tee;
+        if ($opt->{verbose}) {
+                # spawn a line-buffered tee(1) script, otherwise curl
                 # will write 1 character at-a-time and parallel outputs
                 # mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss
-                my $o = { 1 => $lei->{2}, 2 => $lei->{2} };
-                my $pid = spawn(['tail', '-f', $cerr->filename], undef, $o);
-                $reap_tail = PublicInbox::AutoReap->new($pid);
+                # (n.b. POSIX tee(1) cannot do any buffering)
+                my $o = { 1 => $cerr, 2 => $lei->{2} };
+                delete $rdr->{2};
+                @lbf_tee = ([ $^X, qw(-w -p -e), <<'' ], undef, $o);
+BEGIN { $| = 1; use IO::Handle; STDERR->autoflush(1); }
+print STDERR $_;
+
         }
         my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return;
         push @$curl, '-s', '-d', '';
@@ -354,6 +355,7 @@ sub query_remote_mboxrd {
                 $uri->query_form(@qform, q => $q);
                 my $cmd = $curl->for_uri($lei, $uri);
                 $lei->qerr("# $cmd");
+                $rdr->{2} //= popen_wr(@lbf_tee) if @lbf_tee;
                 my $cfh = popen_rd($cmd, undef, $rdr);
                 my $fh = IO::Uncompress::Gunzip->new($cfh, MultiStream => 1);
                 PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self,
@@ -361,17 +363,19 @@ sub query_remote_mboxrd {
                 $lei->sto_done_request if delete($self->{-sto_imported});
                 my $nr = delete $lei->{-nr_remote_eml} // 0;
                 close $cfh;
-                if ($? == 0) { # don't update if no results, maybe MTA is down
+                my $code = $?;
+                if (!$code) { # don't update if no results, maybe MTA is down
                         $lei->{lss}->cfg_set($key, $start) if $key && $nr;
                         mset_progress($lei, $lei->{-current_url}, $nr, $nr);
                         next;
                 }
+                close(delete($rdr->{2})) if @lbf_tee;
                 seek($cerr, 0, SEEK_SET);
                 read($cerr, my $err, -s $cerr);
                 truncate($cerr, 0);
-                next if (($? >> 8) == 22 && $err =~ /\b404\b/);
+                next if (($code >> 8) == 22 && $err =~ /\b404\b/);
                 $uri->query_form(q => $qstr);
-                $lei->child_error($?, "E: <$uri> $err");
+                $lei->child_error($code, "E: <$uri> `$cmd` failed");
         }
         undef $each_smsg;
         $lei->{ovv}->ovv_atexit_child($lei);