From a4df292ed39f6e02a7d57326f291583339cb562d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 2 Mar 2017 03:36:24 +0000 Subject: repobrowse: rework source view to use async cat-file API This will allow most source files to be displayed without blocking public-inbox-httpd on slow disk access. However, we no longer support displaying source files larger than 65536 bytes (the size of a pipe on current Linux). --- lib/PublicInbox/Git.pm | 9 +++ lib/PublicInbox/RepoGitTree.pm | 163 ++++++++++++++++++++++++----------------- 2 files changed, 104 insertions(+), 68 deletions(-) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 3ca0b680..aba4616e 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -283,6 +283,15 @@ sub check_async { } } +sub cat_async { + my ($self, $env, $obj, $cb) = @_; + if ($env->{'pi-httpd.async'}) { + cat_async_ds($self, $obj, $cb); + } else { + cat_async_compat($self, $obj, $cb); + } +} + 1; __END__ =pod diff --git a/lib/PublicInbox/RepoGitTree.pm b/lib/PublicInbox/RepoGitTree.pm index 3f9cd716..e8c34a69 100644 --- a/lib/PublicInbox/RepoGitTree.pm +++ b/lib/PublicInbox/RepoGitTree.pm @@ -16,16 +16,17 @@ my %GIT_MODE = ( ); my $BINARY_MSG = "Binary file, save using the 'raw' link above"; +my $MAX_ASYNC = 65536; # same as pipe size on Linux +my $BIN_DETECT = 8000; # same as git (buffer_is_binary in git.git) sub call_git_tree { my ($self, $req) = @_; my $repo = $req->{-repo}; my $git = $repo->{git}; - my $tip = $req->{tip} || $repo->tip; - my $obj = "$tip:$req->{expath}"; + my $tip = $req->{tip} || $req->{repo}->tip; sub { my ($res) = @_; - $git->check_async($req->{env}, $obj, sub { + $git->check_async($req->{env}, "$tip:$req->{expath}", sub { my ($info) = @_; my ($hex, $type, $size) = @$info; unless (defined $type) { @@ -42,18 +43,12 @@ sub show_tree { my ($self, $req, $res, $hex, $type, $size) = @_; my $opts = { nofollow => 1 }; my $title = "tree: ".utf8_html($req->{expath}); + $req->{thtml} = $self->html_start($req, $title, $opts) . "\n"; if ($type eq 'tree') { $opts->{noindex} = 1; - $req->{thtml} = $self->html_start($req, $title, $opts) . "\n"; git_tree_show($req, $res, $hex); } elsif ($type eq 'blob') { - my $fh = $res->([200, - ['Content-Type','text/html; charset=UTF-8']]); - $fh->write($self->html_start($req, $title, $opts) . - "\n"); - git_blob_show($req, $fh,$hex); - $fh->write(''); - $fh->close; + git_blob_show($req, $res, $hex, $size); } else { $res->([404, ['Content-Type', 'text/plain; charset=UTF-8'], ["Unrecognized type ($type) for $hex\n"]]); @@ -64,8 +59,7 @@ sub cur_path { my ($req) = @_; my @ex = @{$req->{extra}} or return 'root'; my $s; - - my $tip = $req->{-repo}->tip; + my $tip = $req->{tip} || $req->{repo}->tip; my $rel = $req->{relcmd}; # avoid relative paths, here, we don't want to propagate # trailing-slash URLs although we tolerate them @@ -81,69 +75,102 @@ sub cur_path { } @ex), ''.utf8_html($cur).''); } -sub git_blob_show { - my ($req, $fh, $hex) = @_; - # ref: buffer_is_binary in git.git - my $to_read = 8000; # git uses this size to detect binary files - my $text_p; - my $n = 0; - my $git = $req->{-repo}->{git}; - +sub git_blob_sed ($$$) { + my ($req, $hex, $size) = @_; + my $pfx = $req->{tpfx}; + my $nl = 0; + my $bytes = 0; + my @lines; + my $buf = ''; my $rel = $req->{relcmd}; - my $raw = join('/', - "${rel}raw", $req->{-repo}->tip, @{$req->{extra}}); + my $tip = $req->{tip} || $req->{repo}->tip; + my $raw = join('/', "${rel}raw", $tip, @{$req->{extra}}); $raw = PublicInbox::Hval->utf8($raw)->as_path; my $t = cur_path($req); - my $s = qq{\npath: $t\n\nblob $hex}; my $end = ''; + $req->{thtml} .= qq{\npath: $t\n\nblob $hex} . + qq{\t$size bytes (raw)}; + $req->{lstart} = '
';
+	my $s;
 
-	$git->cat_file($hex, sub {
-		my ($cat, $left) = @_; # $$left == $size
-		$s .= qq{\t$$left bytes (raw)};
-		$to_read = $$left if $to_read > $$left;
-		my $r = read($cat, my $buf, $to_read);
-		return unless defined($r) && $r > 0;
-		$$left -= $r;
-
-		if (index($buf, "\0") >= 0) {
-			$fh->write("$s\n$BINARY_MSG
"); - return; - } - $fh->write($s."
");
-		$text_p = 1;
-
-		while (1) {
-			my @buf = split(/\r?\n/, $buf, -1);
-			$buf = pop @buf; # last line, careful...
-			foreach my $l (@buf) {
-				++$n;
-				$fh->write("". utf8_html($l).
-						"\n");
-			}
-			# no trailing newline:
-			if ($$left == 0 && $buf ne '') {
-				++$n;
-				$buf = utf8_html($buf);
-				$fh->write("". $buf ."");
-				$end = '
\ No newline at end of file
'; - last; + sub { + my $dst = delete $req->{thtml} || ''; + if (defined $_[0]) { + return '' if $bytes < 0; # binary + if ($bytes <= $BIN_DETECT) { + if (index($_[0], "\0") >= 0) { + $bytes = -1; + $s = delete $req->{lstart} and + $dst .= $s; + $dst .= "\n"; + $dst .= $BINARY_MSG; + return $dst .= '
'; + } } - - last unless defined($buf); - - $to_read = $$left if $to_read > $$left; - my $off = length $buf; # last line from previous read - $r = read($cat, $buf, $to_read, $off); - return unless defined($r) && $r > 0; - $$left -= $r; + $bytes += bytes::length($_[0]); + $buf .= $_[0]; + $_[0] = ''; # save some memory + $s = delete $req->{lstart} and $dst .= $s; + @lines = split(/\r?\n/, $buf, -1); + $buf = pop @lines; # last line, careful... + } else { # EOF + $s = delete $req->{lstart} and $dst .= $s; + @lines = split(/\r?\n/, $buf, -1); + $buf = pop @lines; + $end .= ''; } - 0; - }); + foreach (@lines) { + ++$nl; + $dst .= ""; + $dst .= sprintf("% 5u\t", $nl); + $dst .= utf8_html($_); + $dst .= "\n"; + } + @lines = (); + if ($end && defined $buf && $buf ne '') { + ++$nl; + $dst .= ""; + $dst .= sprintf("% 5u\t", $nl); + $dst .= utf8_html($buf); + $buf = undef; + $dst .= "\n\\ No newline at end of file"; + } + $dst .= $end; + } +} - # line numbers go in a second column: - $fh->write('
');
-	$fh->write(qq($_\n)) foreach (1..$n);
-	$fh->write("

$end"); +sub git_blob_show { + my ($req, $res, $hex, $size) = @_; + my $sed = git_blob_sed($req, $hex, $size); + my $git = $req->{-repo}->{git}; + if ($size <= $MAX_ASYNC) { + my $buf = ''; # we slurp small files + $git->cat_async($req->{env}, $hex, sub { + my ($r) = @_; + my $ref = ref($r); + return if $ref eq 'ARRAY'; # redundant info + if ($ref eq 'SCALAR') { + $buf .= $$r; + if (bytes::length($buf) == $size) { + my $fh = $res->([200, + ['Content-Type', + 'text/html; charset=UTF-8']]); + $fh->write($sed->($buf)); + $fh->write($sed->(undef)); + $fh->close; + } + return; + } + my $cb = $res or return; + $res = undef; + $cb->([500, + ['Content-Type', 'text/plain; charset=UTF-8'], + [ 'Error' ]]); + }); + } else { + $res->([200, ['Content-Type', 'text/plain; charset=UTF-8'], + [ 'Too big' ]]); + } } sub git_tree_sed ($) { -- cgit v1.2.3-24-ge0c7