dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH 1/6] lei q: delay worker spawn
@ 2021-02-04  2:12 Eric Wong
  2021-02-04  2:12 ` [PATCH 2/6] ipc: localize fields assignment to prevent circular refs Eric Wong
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Eric Wong @ 2021-02-04  2:12 UTC (permalink / raw)
  To: spew

Now that --stdin support is sorted, we can delay spawning
workers until we know the query is ready-to-run.
---
 lib/PublicInbox/LeiQuery.pm   | 19 +++++--------------
 lib/PublicInbox/LeiXSearch.pm |  6 ++++++
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index 4fe40400..6b1aa40c 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -75,21 +75,12 @@ sub lei_q {
 	$xj ||= $lxs->concurrency($opt); # allow: "--jobs ,$WRITER_ONLY"
 	my $nproc = $lxs->detect_nproc; # don't memoize, schedtool(1) exists
 	$xj = $nproc if $xj > $nproc;
-	PublicInbox::LeiOverview->new($self) or return;
-	$self->atfork_prepare_wq($lxs);
-	$lxs->wq_workers_start('lei_xsearch', $xj, $self->oldset);
-	delete $lxs->{-ipc_atfork_child_close};
-	if (my $l2m = $self->{l2m}) {
-		if (defined($mj) && $mj !~ /\A[1-9][0-9]*\z/) {
-			return $self->fail("`$mj' writer jobs must be >= 1");
-		}
-		$mj //= $nproc;
-		$self->atfork_prepare_wq($l2m);
-		$l2m->wq_workers_start('lei2mail', $mj, $self->oldset);
-		delete $l2m->{-ipc_atfork_child_close};
+	$lxs->{jobs} = $xj;
+	if (defined($mj) && $mj !~ /\A[1-9][0-9]*\z/) {
+		return $self->fail("`$mj' writer jobs must be >= 1");
 	}
-
-	# no forking workers after this
+	$self->{l2m}->{jobs} = ($mj // $nproc) if $self->{l2m};
+	PublicInbox::LeiOverview->new($self) or return;
 
 	my %mset_opt = map { $_ => $opt->{$_} } qw(thread limit offset);
 	$mset_opt{asc} = $opt->{'reverse'} ? 1 : 0;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 965617b5..ab66717c 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -406,7 +406,13 @@ sub do_query {
 	$lei->{ovv}->ovv_begin($lei);
 	my ($au_done, $zpipe);
 	my $l2m = $lei->{l2m};
+	$lei->atfork_prepare_wq($self);
+	$self->wq_workers_start('lei_xsearch', $self->{jobs}, $lei->oldset);
+	delete $self->{-ipc_atfork_child_close};
 	if ($l2m) {
+		$lei->atfork_prepare_wq($l2m);
+		$l2m->wq_workers_start('lei2mail', $l2m->{jobs}, $lei->oldset);
+		delete $l2m->{-ipc_atfork_child_close};
 		pipe($lei->{startq}, $au_done) or die "pipe: $!";
 		# 1031: F_SETPIPE_SZ
 		fcntl($lei->{startq}, 1031, 4096) if $^O eq 'linux';

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/6] ipc: localize fields assignment to prevent circular refs
  2021-02-04  2:12 [PATCH 1/6] lei q: delay worker spawn Eric Wong
@ 2021-02-04  2:12 ` Eric Wong
  2021-02-04  2:12 ` [PATCH 3/6] lei q: reorder internals to reduce FD passing Eric Wong
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2021-02-04  2:12 UTC (permalink / raw)
  To: spew

Circular references are bad, OK?
---
 lib/PublicInbox/IPC.pm | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm
index 3873649b..078aaa2c 100644
--- a/lib/PublicInbox/IPC.pm
+++ b/lib/PublicInbox/IPC.pm
@@ -338,7 +338,6 @@ sub _wq_worker_start ($$$) {
 		srand($seed);
 		eval { PublicInbox::DS->Reset };
 		delete @$self{qw(-wq_s1 -wq_workers -wq_ppid)};
-		@$self{keys %$fields} = values(%$fields) if $fields;
 		$SIG{$_} = 'IGNORE' for (qw(PIPE));
 		$SIG{$_} = 'DEFAULT' for (qw(TTOU TTIN TERM QUIT INT CHLD));
 		local $0 = $self->{-wq_ident};
@@ -346,6 +345,8 @@ sub _wq_worker_start ($$$) {
 		# ensure we properly exit even if warn() dies:
 		my $end = PublicInbox::OnDestroy->new($$, sub { exit(!!$@) });
 		eval {
+			$fields //= {};
+			local @$self{keys %$fields} = values(%$fields);
 			my $on_destroy = $self->ipc_atfork_child;
 			local %SIG = %SIG;
 			wq_worker_loop($self);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/6] lei q: reorder internals to reduce FD passing
  2021-02-04  2:12 [PATCH 1/6] lei q: delay worker spawn Eric Wong
  2021-02-04  2:12 ` [PATCH 2/6] ipc: localize fields assignment to prevent circular refs Eric Wong
@ 2021-02-04  2:12 ` Eric Wong
  2021-02-04  2:12 ` [PATCH 4/6] lei q: only start pager if output is to stdout Eric Wong
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2021-02-04  2:12 UTC (permalink / raw)
  To: spew

While FD passing is critical for script/lei <=> lei-daemon,
lei-daemon doesn't need to use it internally if FDs are
created in the proper order before forking.
---
 lib/PublicInbox/IPC.pm         |  3 --
 lib/PublicInbox/LEI.pm         | 99 +++++++---------------------------
 lib/PublicInbox/LeiOverview.pm | 28 +++-------
 lib/PublicInbox/LeiToMail.pm   | 28 ++++++----
 lib/PublicInbox/LeiXSearch.pm  | 94 +++++++++++++++-----------------
 5 files changed, 90 insertions(+), 162 deletions(-)

diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm
index 078aaa2c..7f5a3f6f 100644
--- a/lib/PublicInbox/IPC.pm
+++ b/lib/PublicInbox/IPC.pm
@@ -464,9 +464,6 @@ sub DESTROY {
 	ipc_worker_stop($self);
 }
 
-# Sereal doesn't have dclone
-sub deep_clone { ipc_thaw(ipc_freeze($_[-1])) }
-
 sub detect_nproc () {
 	# _SC_NPROCESSORS_ONLN = 84 on both Linux glibc and musl
 	return POSIX::sysconf(84) if $^O eq 'linux';
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 49deed13..0d4b1c11 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -286,7 +286,7 @@ sub x_it ($$) {
 	# make sure client sees stdout before exit
 	$self->{1}->autoflush(1) if $self->{1};
 	dump_and_clear_log();
-	if (my $s = $self->{pkt_op} // $self->{sock}) {
+	if (my $s = $self->{pkt_op_p} // $self->{sock}) {
 		send($s, "x_it $code", MSG_EOR);
 	} elsif ($self->{oneshot}) {
 		# don't want to end up using $? from child processes
@@ -322,7 +322,8 @@ sub qerr ($;@) { $_[0]->{opt}->{quiet} or err(shift, @_) }
 sub fail ($$;$) {
 	my ($self, $buf, $exit_code) = @_;
 	err($self, $buf) if defined $buf;
-	send($self->{pkt_op}, '!', MSG_EOR) if $self->{pkt_op}; # fail_handler
+	# calls fail_handler:
+	send($self->{pkt_op_p}, '!', MSG_EOR) if $self->{pkt_op_p};
 	x_it($self, ($exit_code // 1) << 8);
 	undef;
 }
@@ -340,7 +341,7 @@ sub puts ($;@) { out(shift, map { "$_\n" } @_) }
 
 sub child_error { # passes non-fatal curl exit codes to user
 	my ($self, $child_error) = @_; # child_error is $?
-	if (my $s = $self->{pkt_op} // $self->{sock}) {
+	if (my $s = $self->{pkt_op_p} // $self->{sock}) {
 		# send to the parent lei-daemon or to lei(1) client
 		send($s, "child_error $child_error", MSG_EOR);
 	} elsif (!$PublicInbox::DS::in_loop) {
@@ -348,94 +349,34 @@ sub child_error { # passes non-fatal curl exit codes to user
 	} # else noop if client disconnected
 }
 
-sub atfork_prepare_wq {
-	my ($self, $wq) = @_;
-	my $tcafc = $wq->{-ipc_atfork_child_close} //= [ $listener // () ];
-	if (my $sock = $self->{sock}) {
-		push @$tcafc, @$self{qw(0 1 2 3)}, $sock;
-	}
-	if (my $pgr = $self->{pgr}) {
-		push @$tcafc, @$pgr[1,2];
-	}
-	if (my $old_1 = $self->{old_1}) {
-		push @$tcafc, $old_1;
-	}
-	for my $f (qw(lxs l2m)) {
-		my $ipc = $self->{$f} or next;
-		push @$tcafc, grep { defined }
-				@$ipc{qw(-wq_s1 -wq_s2 -ipc_req -ipc_res)};
-	}
-}
-
-sub io_restore ($$) {
-	my ($dst, $src) = @_;
-	for my $i (0..2) { # standard FDs
-		my $io = delete $src->{$i} or next;
-		$dst->{$i} = $io;
-	}
-	for my $i (3..9) { # named (non-standard) FDs
-		my $io = $src->{$i} or next;
-		my @st = stat($io) or die "stat $src.$i ($io): $!";
-		my $f = delete $dst->{"dev=$st[0],ino=$st[1]"} // next;
-		$dst->{$f} = $io;
-		delete $src->{$i};
-	}
-}
-
 sub note_sigpipe { # triggers sigpipe_handler
 	my ($self, $fd) = @_;
 	close(delete($self->{$fd})); # explicit close silences Perl warning
-	send($self->{pkt_op}, '|', MSG_EOR) if $self->{pkt_op};
+	send($self->{pkt_op_p}, '|', MSG_EOR) if $self->{pkt_op_p};
 	x_it($self, 13);
 }
 
-sub atfork_child_wq {
-	my ($self, $wq) = @_;
-	io_restore($self, $wq);
-	-S $self->{pkt_op} or die 'BUG: {pkt_op} expected';
-	io_restore($self->{l2m}, $wq);
+sub lei_atfork_child {
+	my ($self) = @_;
+	# we need to explicitly close things which are on stack
+	delete $self->{0};
+	for (delete @$self{qw(3 sock old_1 au_done)}) {
+		close($_) if defined($_);
+	}
+	if (my $op_c = delete $self->{pkt_op_c}) {
+		close(delete $op_c->{sock});
+	}
+	if (my $pgr = delete $self->{pgr}) {
+		close($_) for (@$pgr[1,2]);
+	}
+	close $listener if $listener;
+	undef $listener;
 	%PATH2CFG = ();
 	undef $errors_log;
 	$quit = \&CORE::exit;
 	$current_lei = $self; # for SIG{__WARN__}
 }
 
-sub io_extract ($;@) {
-	my ($obj, @fields) = @_;
-	my @io;
-	for my $f (@fields) {
-		my $io = delete $obj->{$f} or next;
-		my @st = stat($io) or die "W: stat $obj.$f ($io): $!";
-		$obj->{"dev=$st[0],ino=$st[1]"} = $f;
-		push @io, $io;
-	}
-	@io
-}
-
-# usage: ($lei, @io) = $lei->atfork_parent_wq($wq);
-sub atfork_parent_wq {
-	my ($self, $wq) = @_;
-	my $env = delete $self->{env}; # env is inherited at fork
-	my $lei = bless { %$self }, ref($self);
-	for my $f (qw(dedupe ovv)) {
-		my $tmp = delete($lei->{$f}) or next;
-		$lei->{$f} = $wq->deep_clone($tmp);
-	}
-	$self->{env} = $env;
-	delete @$lei{qw(sock 3 -lei_store cfg old_1 pgr lxs)}; # keep l2m
-	my @io = (delete(@$lei{qw(0 1 2)}),
-			io_extract($lei, qw(pkt_op startq)));
-	my $l2m = $lei->{l2m};
-	if ($l2m && $l2m != $wq) { # $wq == lxs
-		if (my $wq_s1 = $l2m->{-wq_s1}) {
-			push @io, io_extract($l2m, '-wq_s1');
-			$l2m->{-wq_s1} = $wq_s1;
-		}
-		$l2m->wq_close(1);
-	}
-	($lei, @io);
-}
-
 sub _help ($;$) {
 	my ($self, $errmsg) = @_;
 	my $cmd = $self->{cmd} // 'COMMAND';
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index e33d63a2..e6bf4f2a 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -207,7 +207,6 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 	}
 	$lei->{ovv_buf} = \(my $buf = '') if !$l2m;
 	if ($l2m && !$ibxish) { # remote https?:// mboxrd
-		delete $l2m->{-wq_s1};
 		my $g2m = $l2m->can('git_to_mail');
 		my $wcb = $l2m->write_cb($lei);
 		sub {
@@ -215,33 +214,20 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 			$wcb->(undef, $smsg, $eml);
 		};
 	} elsif ($l2m && $l2m->{-wq_s1}) {
-		my ($lei_ipc, @io) = $lei->atfork_parent_wq($l2m);
-		# $io[0] becomes a notification pipe that triggers EOF
+		# $io->[0] becomes a notification pipe that triggers EOF
 		# in this wq worker when all outstanding ->write_mail
 		# calls are complete
-		$io[0] = undef;
-		pipe($l2m->{each_smsg_done}, $io[0]) or die "pipe: $!";
-		fcntl($io[0], 1031, 4096) if $^O eq 'linux'; # F_SETPIPE_SZ
-		delete @$lei_ipc{qw(l2m opt mset_opt cmd)};
+		my $io = [];
+		pipe($l2m->{each_smsg_done}, $io->[0]) or die "pipe: $!";
+		fcntl($io->[0], 1031, 4096) if $^O eq 'linux'; # F_SETPIPE_SZ
 		my $git = $ibxish->git; # (LeiXSearch|Inbox|ExtSearch)->git
 		$self->{git} = $git;
 		my $git_dir = $git->{git_dir};
 		sub {
 			my ($smsg, $mitem) = @_;
 			$smsg->{pct} = get_pct($mitem) if $mitem;
-			$l2m->wq_do('write_mail', \@io, $git_dir, $smsg,
-					$lei_ipc);
+			$l2m->wq_do('write_mail', $io, $git_dir, $smsg);
 		}
-	} elsif ($l2m) {
-		my $wcb = $l2m->write_cb($lei);
-		my $git = $ibxish->git; # (LeiXSearch|Inbox|ExtSearch)->git
-		$self->{git} = $git; # for ovv_atexit_child
-		my $g2m = $l2m->can('git_to_mail');
-		sub {
-			my ($smsg, $mitem) = @_;
-			$smsg->{pct} = get_pct($mitem) if $mitem;
-			$git->cat_async($smsg->{blob}, $g2m, [ $wcb, $smsg ]);
-		};
 	} elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) {
 		my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},";
 		sub { # DIY prettiness :P
@@ -275,7 +261,9 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 			$lei->out($buf);
 			$buf = '';
 		}
-	} # else { ...
+	} else {
+		die "TODO: unhandled case $self->{fmt}"
+	}
 }
 
 no warnings 'once';
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index c704dc2a..f9250860 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -211,10 +211,10 @@ sub zsfx2cmd ($$$) {
 }
 
 sub _post_augment_mbox { # open a compressor process
-	my ($self, $lei, $zpipe) = @_;
+	my ($self, $lei) = @_;
 	my $zsfx = $self->{zsfx} or return;
 	my $cmd = zsfx2cmd($zsfx, undef, $lei);
-	my ($r, $w) = splice(@$zpipe, 0, 2);
+	my ($r, $w) = @{delete $lei->{zpipe}};
 	my $rdr = { 0 => $r, 1 => $lei->{1}, 2 => $lei->{2} };
 	my $pid = spawn($cmd, $lei->{env}, $rdr);
 	my $pp = gensym;
@@ -407,7 +407,7 @@ sub _pre_augment_mbox {
 			$! == ENOENT or die "unlink($dst): $!";
 		}
 		open my $out, $mode, $dst or die "open($dst): $!";
-		$lei->{old_1} = $lei->{1};
+		$lei->{old_1} = $lei->{1}; # keep for spawning MUA
 		$lei->{1} = $out;
 	}
 	# Perl does SEEK_END even with O_APPEND :<
@@ -418,7 +418,7 @@ sub _pre_augment_mbox {
 	state $zsfx_allow = join('|', keys %zsfx2cmd);
 	($self->{zsfx}) = ($dst =~ /\.($zsfx_allow)\z/) or return;
 	pipe(my ($r, $w)) or die "pipe: $!";
-	[ $r, $w ];
+	$lei->{zpipe} = [ $r, $w ];
 }
 
 sub _do_augment_mbox {
@@ -462,16 +462,24 @@ sub post_augment { # fast (spawn compressor or mkdir), runs in main daemon
 	$self->$m($lei, @args);
 }
 
+sub ipc_atfork_child {
+	my ($self) = @_;
+	my $lei = delete $self->{lei};
+	$lei->lei_atfork_child;
+	if (my $zpipe = delete $lei->{zpipe}) {
+		$lei->{1} = $zpipe->[1];
+		close $zpipe->[0];
+	}
+	$self->{wcb} = $self->write_cb($lei);
+	$self->SUPER::ipc_atfork_child;
+}
+
 sub write_mail { # via ->wq_do
-	my ($self, $git_dir, $smsg, $lei) = @_;
+	my ($self, $git_dir, $smsg) = @_;
 	my $not_done = delete $self->{0} // die 'BUG: $not_done missing';
-	my $wcb = $self->{wcb} //= do { # first message
-		$lei->atfork_child_wq($self);
-		$self->write_cb($lei);
-	};
 	my $git = $self->{"$$\0$git_dir"} //= PublicInbox::Git->new($git_dir);
 	git_async_cat($git, $smsg->{blob}, \&git_to_mail,
-				[$wcb, $smsg, $not_done]);
+				[$self->{wcb}, $smsg, $not_done]);
 }
 
 sub wq_atexit_child {
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index ab66717c..5a9bde64 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -110,8 +110,8 @@ sub wait_startq ($) {
 sub mset_progress {
 	my $lei = shift;
 	return unless $lei->{-progress};
-	if ($lei->{pkt_op}) { # called via pkt_op/pkt_do from workers
-		pkt_do($lei->{pkt_op}, 'mset_progress', @_);
+	if ($lei->{pkt_op_p}) {
+		pkt_do($lei->{pkt_op_p}, 'mset_progress', @_);
 	} else { # single lei-daemon consumer
 		my ($desc, $mset_size, $mset_total_est) = @_;
 		$lei->{-mset_total} += $mset_size;
@@ -122,9 +122,7 @@ sub mset_progress {
 sub query_thread_mset { # for --thread
 	my ($self, $lei, $ibxish) = @_;
 	local $0 = "$0 query_thread_mset";
-	$lei->atfork_child_wq($self);
 	my $startq = delete $lei->{startq};
-
 	my ($srch, $over) = ($ibxish->search, $ibxish->over);
 	my $desc = $ibxish->{inboxdir} // $ibxish->{topdir};
 	return warn("$desc not indexed by Xapian\n") unless ($srch && $over);
@@ -154,9 +152,9 @@ sub query_thread_mset { # for --thread
 }
 
 sub query_mset { # non-parallel for non-"--thread" users
-	my ($self, $lei) = @_;
+	my ($self) = @_;
 	local $0 = "$0 query_mset";
-	$lei->atfork_child_wq($self);
+	my $lei = $self->{lei};
 	my $startq = delete $lei->{startq};
 	my $mo = { %{$lei->{mset_opt}} };
 	my $mset;
@@ -207,10 +205,10 @@ sub kill_reap {
 }
 
 sub query_remote_mboxrd {
-	my ($self, $lei, $uris) = @_;
+	my ($self, $uris) = @_;
 	local $0 = "$0 query_remote_mboxrd";
-	$lei->atfork_child_wq($self);
 	local $SIG{TERM} = sub { exit(0) }; # for DESTROY (File::Temp, $reap)
+	my $lei = $self->{lei};
 	my ($opt, $env) = @$lei{qw(opt env)};
 	my @qform = (q => $lei->{mset_opt}->{qstr}, x => 'm');
 	push(@qform, t => 1) if $opt->{thread};
@@ -307,7 +305,7 @@ sub git {
 	$git;
 }
 
-sub query_done { # EOF callback
+sub query_done { # EOF callback for main daemon
 	my ($lei) = @_;
 	my $has_l2m = exists $lei->{l2m};
 	for my $f (qw(lxs l2m)) {
@@ -332,9 +330,8 @@ Error closing $lei->{ovv}->{dst}: $!
 }
 
 sub do_post_augment {
-	my ($lei, $zpipe, $au_done) = @_;
-	my $l2m = $lei->{l2m} or die 'BUG: no {l2m}';
-	eval { $l2m->post_augment($lei, $zpipe) };
+	my ($lei) = @_;
+	eval { $lei->{l2m}->post_augment($lei) };
 	if (my $err = $@) {
 		if (my $lxs = delete $lei->{lxs}) {
 			$lxs->wq_kill;
@@ -342,7 +339,7 @@ sub do_post_augment {
 		}
 		$lei->fail("$err");
 	}
-	close $au_done; # triggers wait_startq
+	close(delete $lei->{au_done}); # triggers wait_startq
 }
 
 my $MAX_PER_HOST = 4;
@@ -356,13 +353,13 @@ sub concurrency {
 }
 
 sub start_query { # always runs in main (lei-daemon) process
-	my ($self, $io, $lei) = @_;
+	my ($self, $lei) = @_;
 	if ($lei->{opt}->{thread}) {
 		for my $ibxish (locals($self)) {
-			$self->wq_do('query_thread_mset', $io, $lei, $ibxish);
+			$self->wq_do('query_thread_mset', [], $ibxish);
 		}
 	} elsif (locals($self)) {
-		$self->wq_do('query_mset', $io, $lei);
+		$self->wq_do('query_mset', []);
 	}
 	my $i = 0;
 	my $q = [];
@@ -370,19 +367,23 @@ sub start_query { # always runs in main (lei-daemon) process
 		push @{$q->[$i++ % $MAX_PER_HOST]}, $uri;
 	}
 	for my $uris (@$q) {
-		$self->wq_do('query_remote_mboxrd', $io, $lei, $uris);
+		$self->wq_do('query_remote_mboxrd', [], $uris);
 	}
-	@$io = ();
+}
+
+sub ipc_atfork_child {
+	my ($self) = @_;
+	$self->{lei}->lei_atfork_child;
+	$self->SUPER::ipc_atfork_child;
 }
 
 sub query_prepare { # called by wq_do
-	my ($self, $lei) = @_;
+	my ($self) = @_;
 	local $0 = "$0 query_prepare";
-	$lei->atfork_child_wq($self);
-	delete $lei->{l2m}->{-wq_s1};
+	my $lei = $self->{lei};
 	eval { $lei->{l2m}->do_augment($lei) };
 	$lei->fail($@) if $@;
-	pkt_do($lei->{pkt_op}, '.') == 1 or die "do_post_augment trigger: $!"
+	pkt_do($lei->{pkt_op_p}, '.') == 1 or die "do_post_augment trigger: $!"
 }
 
 sub fail_handler ($;$$) {
@@ -401,45 +402,38 @@ sub sigpipe_handler { # handles SIGPIPE from l2m/lxs workers
 
 sub do_query {
 	my ($self, $lei) = @_;
-	$lei->{1}->autoflush(1);
-	$lei->start_pager if -t $lei->{1};
-	$lei->{ovv}->ovv_begin($lei);
-	my ($au_done, $zpipe);
-	my $l2m = $lei->{l2m};
-	$lei->atfork_prepare_wq($self);
-	$self->wq_workers_start('lei_xsearch', $self->{jobs}, $lei->oldset);
-	delete $self->{-ipc_atfork_child_close};
-	if ($l2m) {
-		$lei->atfork_prepare_wq($l2m);
-		$l2m->wq_workers_start('lei2mail', $l2m->{jobs}, $lei->oldset);
-		delete $l2m->{-ipc_atfork_child_close};
-		pipe($lei->{startq}, $au_done) or die "pipe: $!";
-		# 1031: F_SETPIPE_SZ
-		fcntl($lei->{startq}, 1031, 4096) if $^O eq 'linux';
-		$zpipe = $l2m->pre_augment($lei);
-	}
 	my $ops = {
 		'|' => [ \&sigpipe_handler, $lei ],
 		'!' => [ \&fail_handler, $lei ],
-		'.' => [ \&do_post_augment, $lei, $zpipe, $au_done ],
+		'.' => [ \&do_post_augment, $lei ],
 		'' => [ \&query_done, $lei ],
 		'mset_progress' => [ \&mset_progress, $lei ],
 		'x_it' => [ $lei->can('x_it'), $lei ],
 		'child_error' => [ $lei->can('child_error'), $lei ],
 	};
-	(my $op, $lei->{pkt_op}) = PublicInbox::PktOp->pair($ops);
-	my ($lei_ipc, @io) = $lei->atfork_parent_wq($self);
-	delete($lei->{pkt_op});
-
-	$lei->event_step_init; # wait for shutdowns
+	($lei->{pkt_op_c}, $lei->{pkt_op_p}) = PublicInbox::PktOp->pair($ops);
+	$lei->{1}->autoflush(1);
+	$lei->start_pager if -t $lei->{1};
+	$lei->{ovv}->ovv_begin($lei);
+	my $l2m = $lei->{l2m};
 	if ($l2m) {
-		$self->wq_do('query_prepare', \@io, $lei_ipc);
-		$io[1] = $zpipe->[1] if $zpipe;
+		$l2m->pre_augment($lei);
+		$l2m->wq_workers_start('lei2mail', $l2m->{jobs},
+					$lei->oldset, { lei => $lei });
+		pipe($lei->{startq}, $lei->{au_done}) or die "pipe: $!";
+		# 1031: F_SETPIPE_SZ
+		fcntl($lei->{startq}, 1031, 4096) if $^O eq 'linux';
 	}
-	start_query($self, \@io, $lei_ipc);
-	$self->wq_close(1);
+	$self->wq_workers_start('lei_xsearch', $self->{jobs},
+				$lei->oldset, { lei => $lei });
+	my $op = delete $lei->{pkt_op_c};
+	delete $lei->{pkt_op_p};
+	$l2m->wq_close(1) if $l2m;
+	$lei->event_step_init; # wait for shutdowns
+	$self->wq_do('query_prepare', []) if $l2m;
+	start_query($self, $lei);
+	$self->wq_close(1); # lei_xsearch workers stop when done
 	if ($lei->{oneshot}) {
-		# for the $lei_ipc->atfork_child_wq PIPE handler:
 		while ($op->{sock}) { $op->event_step }
 	}
 }

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/6] lei q: only start pager if output is to stdout
  2021-02-04  2:12 [PATCH 1/6] lei q: delay worker spawn Eric Wong
  2021-02-04  2:12 ` [PATCH 2/6] ipc: localize fields assignment to prevent circular refs Eric Wong
  2021-02-04  2:12 ` [PATCH 3/6] lei q: reorder internals to reduce FD passing Eric Wong
@ 2021-02-04  2:12 ` Eric Wong
  2021-02-04  2:12 ` [PATCH 5/6] lei q: reinstate early MUA spawn for Maildir Eric Wong
  2021-02-04  2:12 ` [PATCH 6/6] eml: handle warning ignores for lei Eric Wong
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2021-02-04  2:12 UTC (permalink / raw)
  To: spew

No need to be starting a pager if we're writing to a regular
file.
---
 lib/PublicInbox/LeiOverview.pm | 3 +--
 lib/PublicInbox/LeiXSearch.pm  | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index e6bf4f2a..3125f015 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -78,9 +78,8 @@ sub new {
 	if ($fmt =~ /\A($JSONL|(?:concat)?json)\z/) {
 		$json = $self->{json} = ref(PublicInbox::Config->json);
 	}
-	my ($isatty, $seekable);
 	if ($dst eq '/dev/stdout') {
-		$isatty = -t $lei->{1};
+		my $isatty = $lei->{need_pager} = -t $lei->{1};
 		$opt->{pretty} //= $isatty;
 		if (!$isatty && -f _) {
 			my $fl = fcntl($lei->{1}, F_GETFL, 0) //
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 5a9bde64..15317b9e 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -413,7 +413,7 @@ sub do_query {
 	};
 	($lei->{pkt_op_c}, $lei->{pkt_op_p}) = PublicInbox::PktOp->pair($ops);
 	$lei->{1}->autoflush(1);
-	$lei->start_pager if -t $lei->{1};
+	$lei->start_pager if delete $lei->{need_pager};
 	$lei->{ovv}->ovv_begin($lei);
 	my $l2m = $lei->{l2m};
 	if ($l2m) {

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 5/6] lei q: reinstate early MUA spawn for Maildir
  2021-02-04  2:12 [PATCH 1/6] lei q: delay worker spawn Eric Wong
                   ` (2 preceding siblings ...)
  2021-02-04  2:12 ` [PATCH 4/6] lei q: only start pager if output is to stdout Eric Wong
@ 2021-02-04  2:12 ` Eric Wong
  2021-02-04  2:12 ` [PATCH 6/6] eml: handle warning ignores for lei Eric Wong
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2021-02-04  2:12 UTC (permalink / raw)
  To: spew

Once all files are written, we can use utime() to poke Maildirs
to wake up MUAs that fail to account for nanosecond timestamps
resolution.
---
 lib/PublicInbox/LEI.pm        |  1 +
 lib/PublicInbox/LeiToMail.pm  | 13 +++++++++++++
 lib/PublicInbox/LeiXSearch.pm | 15 +++++++++------
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 0d4b1c11..24efb494 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -739,6 +739,7 @@ sub start_mua {
 	} elsif ($self->{oneshot}) {
 		$self->{"mua.pid.$self.$$"} = spawn(\@cmd);
 	}
+	delete $self->{-progress};
 }
 
 # caller needs to "-t $self->{1}" to check if tty
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index f9250860..5a6f18fb 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -365,6 +365,7 @@ sub new {
 	} else {
 		die "bad mail --format=$fmt\n";
 	}
+	$self->{dst} = $dst;
 	$lei->{dedupe} = PublicInbox::LeiDedupe->new($lei);
 	$self;
 }
@@ -474,6 +475,18 @@ sub ipc_atfork_child {
 	$self->SUPER::ipc_atfork_child;
 }
 
+sub lock_free {
+	$_[0]->{base_type} =~ /\A(?:maildir|mh|imap|jmap)\z/ ? 1 : 0;
+}
+
+sub poke_dst {
+	my ($self) = @_;
+	if ($self->{base_type} eq 'maildir') {
+		my $t = time + 1;
+		utime($t, $t, "$self->{dst}/cur");
+	}
+}
+
 sub write_mail { # via ->wq_do
 	my ($self, $git_dir, $smsg) = @_;
 	my $not_done = delete $self->{0} // die 'BUG: $not_done missing';
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 15317b9e..87414140 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -307,13 +307,13 @@ sub git {
 
 sub query_done { # EOF callback for main daemon
 	my ($lei) = @_;
-	my $has_l2m = exists $lei->{l2m};
-	for my $f (qw(lxs l2m)) {
-		my $wq = delete $lei->{$f} or next;
-		$wq->wq_wait_old($lei);
+	my $l2m = delete $lei->{l2m};
+	$l2m->wq_wait_old($lei) if $l2m;
+	if (my $lxs = delete $lei->{lxs}) {
+		$lxs->wq_wait_old($lei);
 	}
 	$lei->{ovv}->ovv_end($lei);
-	if ($has_l2m) { # close() calls LeiToMail reap_compress
+	if ($l2m) { # close() calls LeiToMail reap_compress
 		if (my $out = delete $lei->{old_1}) {
 			if (my $mbout = $lei->{1}) {
 				close($mbout) or return $lei->fail(<<"");
@@ -322,7 +322,7 @@ Error closing $lei->{ovv}->{dst}: $!
 			}
 			$lei->{1} = $out;
 		}
-		$lei->start_mua;
+		$l2m->lock_free ? $l2m->poke_dst : $lei->start_mua;
 	}
 	$lei->{-progress} and
 		$lei->err('# ', $lei->{-mset_total} // 0, " matches");
@@ -354,6 +354,9 @@ sub concurrency {
 
 sub start_query { # always runs in main (lei-daemon) process
 	my ($self, $lei) = @_;
+	if (my $l2m = $lei->{l2m}) {
+		$lei->start_mua if $l2m->lock_free;
+	}
 	if ($lei->{opt}->{thread}) {
 		for my $ibxish (locals($self)) {
 			$self->wq_do('query_thread_mset', [], $ibxish);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 6/6] eml: handle warning ignores for lei
  2021-02-04  2:12 [PATCH 1/6] lei q: delay worker spawn Eric Wong
                   ` (3 preceding siblings ...)
  2021-02-04  2:12 ` [PATCH 5/6] lei q: reinstate early MUA spawn for Maildir Eric Wong
@ 2021-02-04  2:12 ` Eric Wong
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2021-02-04  2:12 UTC (permalink / raw)
  To: spew

There's nothing we can do about bad emails in our search
results, so quiet things down.
---
 lib/PublicInbox/Admin.pm         |  7 +++----
 lib/PublicInbox/Eml.pm           | 19 +++++++++++++++++++
 lib/PublicInbox/InboxWritable.pm | 24 +-----------------------
 lib/PublicInbox/LeiToMail.pm     |  1 +
 lib/PublicInbox/Watch.pm         | 14 ++++++--------
 5 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index f96397ea..3b38a5a3 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -10,6 +10,7 @@ our @EXPORT_OK = qw(setup_signals);
 use PublicInbox::Config;
 use PublicInbox::Inbox;
 use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Eml;
 *rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed;
 
 sub setup_signals {
@@ -241,12 +242,10 @@ sub index_inbox {
 	}
 	local %SIG = %SIG;
 	setup_signals(\&index_terminate, $ibx);
-	my $warn_cb = $SIG{__WARN__} // \&CORE::warn;
 	my $idx = { current_info => $ibx->{inboxdir} };
-	my $warn_ignore = PublicInbox::InboxWritable->can('warn_ignore');
 	local $SIG{__WARN__} = sub {
-		return if $warn_ignore->(@_);
-		$warn_cb->($idx->{current_info}, ': ', @_);
+		return if PublicInbox::Eml::warn_ignore(@_);
+		warn($idx->{current_info}, ': ', @_);
 	};
 	if (ref($ibx) && $ibx->version == 2) {
 		eval { require PublicInbox::V2Writable };
diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm
index bd27f19b..f7f62e7b 100644
--- a/lib/PublicInbox/Eml.pm
+++ b/lib/PublicInbox/Eml.pm
@@ -477,6 +477,25 @@ sub charset_set {
 
 sub crlf { $_[0]->{crlf} // "\n" }
 
+# warnings to ignore when handling spam mailboxes and maybe other places
+sub warn_ignore {
+	my $s = "@_";
+	# Email::Address::XS warnings
+	$s =~ /^Argument contains empty address at /
+	|| $s =~ /^Element at index [0-9]+ contains /
+	# PublicInbox::MsgTime
+	|| $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/
+	|| $s =~ /^bad Date: .+? in /
+	# Encode::Unicode::UTF7
+	|| $s =~ /^Bad UTF7 data escape at /
+}
+
+# this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..."
+sub warn_ignore_cb {
+	my $cb = $SIG{__WARN__} // \&CORE::warn;
+	sub { $cb->(@_) unless warn_ignore(@_) }
+}
+
 sub willneed { re_memo($_) for @_ }
 
 willneed(qw(From To Cc Date Subject Content-Type In-Reply-To References
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 982ad6e5..3a4012cd 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -9,7 +9,7 @@ use parent qw(PublicInbox::Inbox Exporter);
 use PublicInbox::Import;
 use PublicInbox::Filter::Base qw(REJECT);
 use Errno qw(ENOENT);
-our @EXPORT_OK = qw(eml_from_path warn_ignore_cb);
+our @EXPORT_OK = qw(eml_from_path);
 
 use constant {
 	PERM_UMASK => 0,
@@ -277,28 +277,6 @@ sub cleanup ($) {
 	delete @{$_[0]}{qw(over mm git search)};
 }
 
-# warnings to ignore when handling spam mailboxes and maybe other places
-sub warn_ignore {
-	my $s = "@_";
-	# Email::Address::XS warnings
-	$s =~ /^Argument contains empty address at /
-	|| $s =~ /^Element at index [0-9]+ contains /
-	# PublicInbox::MsgTime
-	|| $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/
-	|| $s =~ /^bad Date: .+? in /
-	# Encode::Unicode::UTF7
-	|| $s =~ /^Bad UTF7 data escape at /
-}
-
-# this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..."
-sub warn_ignore_cb {
-	my $cb = $SIG{__WARN__} // \&CORE::warn;
-	sub {
-		return if warn_ignore(@_);
-		$cb->(@_);
-	}
-}
-
 # v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove
 sub git_dir_latest {
 	my ($self, $max) = @_;
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 5a6f18fb..1f815e40 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -472,6 +472,7 @@ sub ipc_atfork_child {
 		close $zpipe->[0];
 	}
 	$self->{wcb} = $self->write_cb($lei);
+	$SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb();
 	$self->SUPER::ipc_atfork_child;
 }
 
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 2b44ba43..185e5da8 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -7,7 +7,7 @@ package PublicInbox::Watch;
 use strict;
 use v5.10.1;
 use PublicInbox::Eml;
-use PublicInbox::InboxWritable qw(eml_from_path warn_ignore_cb);
+use PublicInbox::InboxWritable qw(eml_from_path);
 use PublicInbox::Filter::Base qw(REJECT);
 use PublicInbox::Spamcheck;
 use PublicInbox::Sigfd;
@@ -174,7 +174,7 @@ sub _remove_spam {
 	# path must be marked as (S)een
 	$path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return;
 	my $eml = eml_from_path($path) or return;
-	local $SIG{__WARN__} = warn_ignore_cb();
+	local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb();
 	$self->{pi_cfg}->each_inbox(\&remove_eml_i, $self, $eml, $path);
 }
 
@@ -414,13 +414,11 @@ sub imap_import_msg ($$$$$) {
 			import_eml($self, $ibx, $eml);
 		}
 	} elsif ($inboxes eq 'watchspam') {
-		# we don't remove unseen messages
-		if ($flags =~ /\\Seen\b/) {
-			local $SIG{__WARN__} = warn_ignore_cb();
-			my $eml = PublicInbox::Eml->new($raw);
-			$self->{pi_cfg}->each_inbox(\&remove_eml_i,
+		return if $flags !~ /\\Seen\b/; # don't remove unseen messages
+		local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb();
+		my $eml = PublicInbox::Eml->new($raw);
+		$self->{pi_cfg}->each_inbox(\&remove_eml_i,
 						$self, $eml, "$url UID:$uid");
-		}
 	} else {
 		die "BUG: destination unknown $inboxes";
 	}

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-02-04  2:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-02-04  2:12 [PATCH 1/6] lei q: delay worker spawn Eric Wong
2021-02-04  2:12 ` [PATCH 2/6] ipc: localize fields assignment to prevent circular refs Eric Wong
2021-02-04  2:12 ` [PATCH 3/6] lei q: reorder internals to reduce FD passing Eric Wong
2021-02-04  2:12 ` [PATCH 4/6] lei q: only start pager if output is to stdout Eric Wong
2021-02-04  2:12 ` [PATCH 5/6] lei q: reinstate early MUA spawn for Maildir Eric Wong
2021-02-04  2:12 ` [PATCH 6/6] eml: handle warning ignores for lei Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).