dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH 0/3] git HTTP serving improvements
@ 2016-04-29  3:32 Eric Wong
  2016-04-29  3:32 ` [PATCH 1/3] http: improve error handling for aborted responses Eric Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Eric Wong @ 2016-04-29  3:32 UTC (permalink / raw)
  To: spew

This series is still a work-in-progress as it is missing tests,
but giant static files over HTTP should NEVER be buffered
in memory.  Pack generation is currently a problem, however,
since git can be damn fast and clients damn slow.

Eric Wong (3):
      http: improve error handling for aborted responses
      githttpbackend: use larger read buffers for pipes and files
      githttpbackend: memory usage fix for large static files

 lib/PublicInbox/GitHTTPBackend.pm | 108 ++++++++++++++++++++++++++++----------
 lib/PublicInbox/HTTP.pm           |   3 ++
 t/githttpbackend.psgi             |  24 +++++++++
 3 files changed, 107 insertions(+), 28 deletions(-)


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] http: improve error handling for aborted responses
  2016-04-29  3:32 [PATCH 0/3] git HTTP serving improvements Eric Wong
@ 2016-04-29  3:32 ` Eric Wong
  2016-04-29  3:32 ` [PATCH 2/3] githttpbackend: use larger read buffers for pipes and files Eric Wong
  2016-04-29  3:32 ` [PATCH 3/3] githttpbackend: memory usage fix for large static files Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2016-04-29  3:32 UTC (permalink / raw)
  To: spew

We need to abort connections properly if a response is prematurely
truncated.  This includes problems with serving static files, since
a clumsy admin or broken FS could return truncated responses and
inadvertently leave a client waiting (since the client saw
"Content-Length" in the header and expected a certain length).
---
 lib/PublicInbox/GitHTTPBackend.pm | 19 ++++++++++++++-----
 lib/PublicInbox/HTTP.pm           |  3 +++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index a7cac10..456671d 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -95,6 +95,7 @@ sub serve_dumb {
 			$len -= $r;
 			$fh->write($buf);
 		}
+		die "$f truncated with $len bytes remaining\n" if $len;
 		$fh->close;
 	}
 }
@@ -191,14 +192,21 @@ sub serve_smart {
 			$fh = undef;
 		}
 		if ($rpipe) {
-			$rpipe->close; # _may_ be Danga::Socket::close
+			# _may_ be Danga::Socket::close via
+			# PublicInbox::HTTPD::Async::close:
+			$rpipe->close;
 			$rpipe = undef;
 			$nr_running--;
 		}
-		if (defined $pid && $pid != waitpid($pid, 0)) {
-			$err->print("git http-backend ($git_dir): $?\n");
-		} else {
-			$pid = undef;
+		if (defined $pid) {
+			my $e = $pid == waitpid($pid, 0) ?
+				$? : "PID:$pid still running?";
+			if ($e) {
+				$err->print("http-backend ($git_dir): $e\n");
+				if (my $io = $env->{'psgix.io'}) {
+					$io->close;
+				}
+			}
 		}
 		return unless $res;
 		my $dumb = serve_dumb($cgi, $git, $path);
@@ -245,6 +253,7 @@ sub serve_smart {
 		} # else { keep reading ... }
 	};
 	if (my $async = $env->{'pi-httpd.async'}) {
+		# $async is PublicInbox::HTTPD::Async->new($rpipe, $cb)
 		$rpipe = $async->($rpipe, $cb);
 		sub { ($res) = @_ } # let Danga::Socket handle the rest.
 	} else { # synchronous loop for other PSGI servers
diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index 3934512..2f50241 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -135,6 +135,9 @@ sub app_dispatch ($) {
 	sysseek($env->{'psgi.input'}, 0, SEEK_SET) or
 			die "BUG: psgi.input seek failed: $!";
 
+	# note: not $self->{sock}, we want Danga::Socket::close,
+	# not the underlying close.
+	$env->{'psgix.io'} = $self; # only for ->close
 	my $res = Plack::Util::run_app($self->{httpd}->{app}, $env);
 	eval {
 		if (ref($res) eq 'CODE') {
-- 
EW


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] githttpbackend: use larger read buffers for pipes and files
  2016-04-29  3:32 [PATCH 0/3] git HTTP serving improvements Eric Wong
  2016-04-29  3:32 ` [PATCH 1/3] http: improve error handling for aborted responses Eric Wong
@ 2016-04-29  3:32 ` Eric Wong
  2016-04-29  3:32 ` [PATCH 3/3] githttpbackend: memory usage fix for large static files Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2016-04-29  3:32 UTC (permalink / raw)
  To: spew

These buffers are short-lived and we do not do any processing on
them.  Thus we can we can afford bigger buffers to match the
current pipe capacity under Linux 4.x for large reads from git.

We maintain 8K buffers for HTTP socket reads, as we expect
all of our HTTP requests to be smaller than that.
---
 lib/PublicInbox/GitHTTPBackend.pm | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index 456671d..70cd560 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -87,7 +87,7 @@ sub serve_dumb {
 		my ($res) = @_; # Plack callback
 		my $fh = $res->([ $code, \@h ]);
 		my $buf;
-		my $n = 8192;
+		my $n = 65536;
 		while ($len > 0) {
 			$n = $len if $len < $n;
 			my $r = sysread($in, $buf, $n);
@@ -222,8 +222,9 @@ sub serve_smart {
 		$end->();
 		$err->print("git http-backend ($git_dir): $e\n");
 	};
+	my $n = 4096;
 	my $cb = sub { # read git-http-backend output and stream to client
-		my $r = $rpipe ? $rpipe->sysread($buf, 8192, length($buf)) : 0;
+		my $r = $rpipe ? $rpipe->sysread($buf, $n, length($buf)) : 0;
 		return $fail->($!{EAGAIN} ? 'EAGAIN' : $!) unless defined $r;
 		return $end->() if $r == 0; # EOF
 		if ($fh) { # stream body from git-http-backend to HTTP client
@@ -248,6 +249,7 @@ sub serve_smart {
 				$fh = $res->([ $code, \@h ]);
 				$res = undef;
 				$fh->write($buf);
+				$n = 65536; # done parsing, stream more at once
 			}
 			$buf = '';
 		} # else { keep reading ... }
@@ -272,7 +274,7 @@ sub input_to_file {
 	my $input = $env->{'psgi.input'};
 	my $buf;
 	while (1) {
-		my $r = $input->read($buf, 8192);
+		my $r = $input->read($buf, 65536);
 		unless (defined $r) {
 			my $err = $env->{'psgi.errors'};
 			$err->print("error reading input: $!\n");
-- 
EW


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] githttpbackend: memory usage fix for large static files
  2016-04-29  3:32 [PATCH 0/3] git HTTP serving improvements Eric Wong
  2016-04-29  3:32 ` [PATCH 1/3] http: improve error handling for aborted responses Eric Wong
  2016-04-29  3:32 ` [PATCH 2/3] githttpbackend: use larger read buffers for pipes and files Eric Wong
@ 2016-04-29  3:32 ` Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2016-04-29  3:32 UTC (permalink / raw)
  To: spew

When serving large static files, Danga::Socket::write may queue
up callbacks and defer firing them until the socket is writable.
This prevents us from scheduling writes or buffering until we
know the socket is writable.  This prevents needless buffering
by Danga::Socket.

TODO: implement this for smart HTTP, too, but that would
throttle pack generation; so maybe not a good idea?
---
 lib/PublicInbox/GitHTTPBackend.pm | 79 ++++++++++++++++++++++++++-------------
 t/githttpbackend.psgi             | 24 ++++++++++++
 2 files changed, 77 insertions(+), 26 deletions(-)
 create mode 100644 t/githttpbackend.psgi

diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index 70cd560..76bcfba 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -50,6 +50,17 @@ sub serve {
 	serve_dumb($cgi, $git, $path);
 }
 
+sub err ($@) {
+	my ($env, @msg) = @_;
+	$env->{'psgi.errors'}->print(@msg, "\n");
+}
+
+sub drop_client ($) {
+	if (my $io = $_[0]->{'psgix.io'}) {
+		$io->close;
+	}
+}
+
 sub serve_dumb {
 	my ($cgi, $git, $path) = @_;
 
@@ -61,18 +72,42 @@ sub serve_dumb {
 	} else {
 		return r(404);
 	}
+
 	my $f = "$git->{git_dir}/$path";
-	return r(404) unless -f $f && -r _;
+	return r(404) unless -f $f && -r _; # just in case it's a FIFO :P
 	my @st = stat(_);
 	my $size = $st[7];
+	my $env = $cgi->{env};
 
-	# TODO: If-Modified-Since and Last-Modified
+	# TODO: If-Modified-Since and Last-Modified?
 	open my $in, '<', $f or return r(404);
-	my $code = 200;
 	my $len = $size;
-	my @h;
+	my $n = 65536;
+	my ($next, $fh);
+	my $cb = sub {
+		$n = $len if $len < $n;
+		my $r = sysread($in, my $buf, $n);
+		if (!defined $r) {
+			err($env, "$f read error: $!");
+			drop_client($env);
+		} elsif ($r <= 0) {
+			err($env, "$f EOF with $len bytes left");
+			drop_client($env);
+		} else {
+			$len -= $r;
+			$fh->write($buf);
+			if ($len == 0) {
+				$fh->close;
+			} elsif ($next) {
+				# avoid infinite recursion
+				return Danga::Socket->AddTimer(0, $next);
+			}
+		}
+		$fh = $next = undef;
+	};
 
-	my $env = $cgi->{env};
+	my $code = 200;
+	my @h;
 	my $range = $env->{HTTP_RANGE};
 	if (defined $range && $range =~ /\bbytes=(\d*)-(\d*)\z/) {
 		($code, $len) = prepare_range($cgi, $in, \@h, $1, $2, $size);
@@ -81,22 +116,17 @@ sub serve_dumb {
 			return [ 416, \@h, [] ];
 		}
 	}
-
 	push @h, 'Content-Type', $type, 'Content-Length', $len;
+
 	sub {
 		my ($res) = @_; # Plack callback
-		my $fh = $res->([ $code, \@h ]);
-		my $buf;
-		my $n = 65536;
-		while ($len > 0) {
-			$n = $len if $len < $n;
-			my $r = sysread($in, $buf, $n);
-			last if (!defined($r) || $r <= 0);
-			$len -= $r;
-			$fh->write($buf);
+		$fh = $res->([ $code, \@h ]);
+		if (defined $env->{'pi-httpd.async'}) {
+			$next = sub { $fh->write($cb) };
+			$cb->(); # start it off!
+		} else {
+			$cb->() while $fh;
 		}
-		die "$f truncated with $len bytes remaining\n" if $len;
-		$fh->close;
 	}
 }
 
@@ -158,7 +188,7 @@ sub serve_smart {
 	}
 	my ($rpipe, $wpipe);
 	unless (pipe($rpipe, $wpipe)) {
-		$err->print("error creating pipe: $! - going static\n");
+		err($env, "error creating pipe: $! - going static");
 		return;
 	}
 	my %env = %ENV;
@@ -179,7 +209,7 @@ sub serve_smart {
 	my %rdr = ( 0 => fileno($in), 1 => fileno($wpipe) );
 	my $pid = spawn([qw(git http-backend)], \%env, \%rdr);
 	unless (defined $pid) {
-		$err->print("error spawning: $! - going static\n");
+		err($env, "error spawning: $! - going static");
 		return;
 	}
 	$wpipe = $in = undef;
@@ -202,10 +232,8 @@ sub serve_smart {
 			my $e = $pid == waitpid($pid, 0) ?
 				$? : "PID:$pid still running?";
 			if ($e) {
-				$err->print("http-backend ($git_dir): $e\n");
-				if (my $io = $env->{'psgix.io'}) {
-					$io->close;
-				}
+				err($env, "git http-backend ($git_dir): $e");
+				drop_client($env);
 			}
 		}
 		return unless $res;
@@ -220,7 +248,7 @@ sub serve_smart {
 			return;
 		}
 		$end->();
-		$err->print("git http-backend ($git_dir): $e\n");
+		err($err, "git http-backend ($git_dir): $e\n");
 	};
 	my $n = 4096;
 	my $cb = sub { # read git-http-backend output and stream to client
@@ -276,8 +304,7 @@ sub input_to_file {
 	while (1) {
 		my $r = $input->read($buf, 65536);
 		unless (defined $r) {
-			my $err = $env->{'psgi.errors'};
-			$err->print("error reading input: $!\n");
+			err($env, "error reading input: $!");
 			return;
 		}
 		last if ($r == 0);
diff --git a/t/githttpbackend.psgi b/t/githttpbackend.psgi
new file mode 100644
index 0000000..52b3025
--- /dev/null
+++ b/t/githttpbackend.psgi
@@ -0,0 +1,24 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+use strict;
+use warnings;
+use PublicInbox::GitHTTPBackend;
+use PublicInbox::Git;
+use Plack::Builder;
+use Plack::Request;
+my $git_dir = $ENV{GIANT_GIT_DIR} or die 'GIANT_GIT_DIR not defined in env';
+my $git = PublicInbox::Git->new($git_dir);
+builder {
+	enable 'Chunked' if $ENV{TEST_CHUNK};
+	enable 'Head';
+	sub {
+		my ($env) = @_;
+		my $pr = Plack::Request->new($env);
+		if ($pr->path_info =~ m!\A/(.+)\z!s) {
+			PublicInbox::GitHTTPBackend::serve($pr, $git, $1);
+		} else {
+			[ 404, [ qw(Content-Type text/plain) ], [] ]
+		}
+	}
+}
-- 
EW


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-04-29  3:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-04-29  3:32 [PATCH 0/3] git HTTP serving improvements Eric Wong
2016-04-29  3:32 ` [PATCH 1/3] http: improve error handling for aborted responses Eric Wong
2016-04-29  3:32 ` [PATCH 2/3] githttpbackend: use larger read buffers for pipes and files Eric Wong
2016-04-29  3:32 ` [PATCH 3/3] githttpbackend: memory usage fix for large static files Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).