about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2024-03-11 19:40:09 +0000
committerEric Wong <e@80x24.org>2024-03-12 06:18:15 +0000
commit8d6a50ff2a4412e66731cbad36b2c33c18e6a89e (patch)
tree193e68c6320fe684337dcab0e62a4aa5d996eeac
parentcdf70291cfe96fa1deb081873d156e8c31c70938 (diff)
downloadpublic-inbox-8d6a50ff2a4412e66731cbad36b2c33c18e6a89e.tar.gz
Wrap the entire solver command chain with a dedicated limiter.
The normal limiter is designed for longer-lived commands or ones
which serve a single HTTP request (e.g. git-http-backend or
cgit) and not effective for short memory + CPU intensive commands
used for solver.

Each overall solver request is both memory + CPU intensive: it
spawns several short-lived git processes(*) in addition to a
longer-lived `git cat-file --batch' process.

Thus running parallel solvers from a single -netd/-httpd worker
(which have their own parallelization) results in excessive
parallelism that is both memory and CPU-bound (not network-bound)
and cascade into slowdowns for handling simpler memory/CPU-bound
requests.  Parallel solvers were also responsible for the
increased lifetime and frequency of zombies since the event loop
was too saturated to reap them.

We'll also return 503 on excessive solver queueing, since these
require an FD for the client HTTP(S) socket to be held onto.

(*) git (update-index|apply|ls-files) are all run by solver and
    short-lived
-rw-r--r--lib/PublicInbox/SolverGit.pm15
-rw-r--r--lib/PublicInbox/ViewVCS.pm48
2 files changed, 48 insertions, 15 deletions
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 4e79f750..296e7d17 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -256,6 +256,12 @@ sub update_index_result ($$) {
         next_step($self); # onto do_git_apply
 }
 
+sub qsp_qx ($$$) {
+        my ($self, $qsp, $cb) = @_;
+        $qsp->{qsp_err} = \($self->{-qsp_err} = '');
+        $qsp->psgi_qx($self->{psgi_env}, $self->{limiter}, $cb, $self);
+}
+
 sub prepare_index ($) {
         my ($self) = @_;
         my $patches = $self->{patches};
@@ -284,9 +290,8 @@ sub prepare_index ($) {
         my $cmd = [ qw(git update-index -z --index-info) ];
         my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr);
         $path_a = git_quote($path_a);
-        $qsp->{qsp_err} = \($self->{-qsp_err} = '');
         $self->{-msg} = "index prepared:\n$mode_a $oid_full\t$path_a";
-        $qsp->psgi_qx($self->{psgi_env}, undef, \&update_index_result, $self);
+        qsp_qx $self, $qsp, \&update_index_result;
 }
 
 # pure Perl "git init"
@@ -465,8 +470,7 @@ sub apply_result ($$) { # qx_cb
         my @cmd = qw(git ls-files -s -z);
         my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env});
         $self->{-cur_di} = $di;
-        $qsp->{qsp_err} = \($self->{-qsp_err} = '');
-        $qsp->psgi_qx($self->{psgi_env}, undef, \&ls_files_result, $self);
+        qsp_qx $self, $qsp, \&ls_files_result;
 }
 
 sub do_git_apply ($) {
@@ -495,8 +499,7 @@ sub do_git_apply ($) {
         my $opt = { 2 => 1, -C => _tmp($self)->dirname, quiet => 1 };
         my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}, $opt);
         $self->{-cur_di} = $di;
-        $qsp->{qsp_err} = \($self->{-qsp_err} = '');
-        $qsp->psgi_qx($self->{psgi_env}, undef, \&apply_result, $self);
+        qsp_qx $self, $qsp, \&apply_result;
 }
 
 sub di_url ($$) {
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 61329db6..790b9a2c 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -49,6 +49,10 @@ my %GIT_MODE = (
         '160000' => 'g', # commit (gitlink)
 );
 
+# TODO: not fork safe, but we don't fork w/o exec in PublicInbox::WWW
+my (@solver_q, $solver_lim);
+my $solver_nr = 0;
+
 sub html_page ($$;@) {
         my ($ctx, $code) = @_[0, 1];
         my $wcb = delete $ctx->{-wcb};
@@ -614,26 +618,52 @@ sub show_blob { # git->cat_async callback
                 '</code></pre></td></tr></table>'.dbg_log($ctx), @def);
 }
 
-# GET /$INBOX/$GIT_OBJECT_ID/s/
-# GET /$INBOX/$GIT_OBJECT_ID/s/$FILENAME
-sub show ($$;$) {
-        my ($ctx, $oid_b, $fn) = @_;
-        my $hints = $ctx->{hints} = {};
+sub start_solver ($) {
+        my ($ctx) = @_;
         while (my ($from, $to) = each %QP_MAP) {
                 my $v = $ctx->{qp}->{$from} // next;
-                $hints->{$to} = $v if $v ne '';
+                $ctx->{hints}->{$to} = $v if $v ne '';
         }
-        $ctx->{fn} = $fn;
-        $ctx->{-tmp} = File::Temp->newdir("solver.$oid_b-XXXX", TMPDIR => 1);
+        $ctx->{-next_solver} = PublicInbox::OnDestroy->new($$, \&next_solver);
+        ++$solver_nr;
+        $ctx->{-tmp} = File::Temp->newdir("solver.$ctx->{oid_b}-XXXX",
+                                                TMPDIR => 1);
         $ctx->{lh} or open $ctx->{lh}, '+>>', "$ctx->{-tmp}/solve.log";
         my $solver = PublicInbox::SolverGit->new($ctx->{ibx},
                                                 \&solve_result, $ctx);
+        $solver->{limiter} = $solver_lim;
         $solver->{gits} //= [ $ctx->{git} ];
         $solver->{tmp} = $ctx->{-tmp}; # share tmpdir
         # PSGI server will call this immediately and give us a callback (-wcb)
+        $solver->solve(@$ctx{qw(env lh oid_b hints)});
+}
+
+# run the next solver job when done and DESTROY-ed
+sub next_solver {
+        --$solver_nr;
+        # XXX FIXME: client may've disconnected if it waited a long while
+        start_solver(shift(@solver_q) // return);
+}
+
+sub may_start_solver ($) {
+        my ($ctx) = @_;
+        $solver_lim //= $ctx->{www}->{pi_cfg}->limiter('codeblob');
+        if ($solver_nr >= $solver_lim->{max}) {
+                @solver_q > 128 ? html_page($ctx, 503, 'too busy')
+                                : push(@solver_q, $ctx);
+        } else {
+                start_solver($ctx);
+        }
+}
+
+# GET /$INBOX/$GIT_OBJECT_ID/s/
+# GET /$INBOX/$GIT_OBJECT_ID/s/$FILENAME
+sub show ($$;$) {
+        my ($ctx, $oid_b, $fn) = @_;
+        @$ctx{qw(oid_b fn)} = ($oid_b, $fn);
         sub {
                 $ctx->{-wcb} = $_[0]; # HTTP write callback
-                $solver->solve($ctx->{env}, $ctx->{lh}, $oid_b, $hints);
+                may_start_solver $ctx;
         };
 }