diff options
author | Eric Wong <e@80x24.org> | 2016-04-04 21:09:19 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2016-04-05 23:50:12 +0000 |
commit | 25b49fc37121d8584b84b44b20c910ef43c44950 (patch) | |
tree | 848f28873118519deed9175dd67f8c77410b89d0 /lib/PublicInbox | |
parent | 07ba708d4d52f25f371b72801f0b00458944d634 (diff) | |
download | public-inbox-25b49fc37121d8584b84b44b20c910ef43c44950.tar.gz |
We currently do not display links to snapshots, but may in the future (optionally, like cgit). However, support snapshots for compatibility reasons in case people had cached URLs or auto-generated them somewhere. We won't natively support xz and bzip2 just yet, but will document (at least in comments) how to enable these expensive compression schemes via git-config(1). Also, support disabling certain archive types to twart URL guessing or old cached links from spiders burning bandwidth. In retrospect, enabling snapshots for my own cgit views was a bad idea since it wastes bandwidth from crawlers and is is often not useful for users with maintainer-built files (e.g. "configure" from "configure.ac" for autoconf, where only the latter is stored in git and the former is generated in release tarballs).
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r-- | lib/PublicInbox/Repobrowse.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/RepobrowseBase.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/RepobrowseConfig.pm | 21 | ||||
-rw-r--r-- | lib/PublicInbox/RepobrowseGitSnapshot.pm | 131 |
4 files changed, 154 insertions, 4 deletions
diff --git a/lib/PublicInbox/Repobrowse.pm b/lib/PublicInbox/Repobrowse.pm index 0c4cf144..0a812f72 100644 --- a/lib/PublicInbox/Repobrowse.pm +++ b/lib/PublicInbox/Repobrowse.pm @@ -25,7 +25,7 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape); use PublicInbox::RepobrowseConfig; my %CMD = map { lc($_) => $_ } qw(Log Commit Tree Patch Blob Plain Tag Atom - Diff); + Diff Snapshot); my %VCS = (git => 'Git'); my %LOADED; diff --git a/lib/PublicInbox/RepobrowseBase.pm b/lib/PublicInbox/RepobrowseBase.pm index 7863d6ce..33647fca 100644 --- a/lib/PublicInbox/RepobrowseBase.pm +++ b/lib/PublicInbox/RepobrowseBase.pm @@ -5,7 +5,7 @@ use strict; use warnings; require PublicInbox::RepobrowseGitQuery; use PublicInbox::Hval; -our %MIME_TYPE_WHITELIST = ( 'application/pdf' => 1 ); +our %MIME_TYPE_WHITELIST = ('application/pdf' => 1); sub new { bless {}, shift } @@ -107,7 +107,7 @@ sub r { # mainly for curl (no-'-L') users: $body = "Redirecting to $redir\n"; } else { - die "not implemented, yet: $status"; + push @h, qw(Content-Type text/plain); } [ $status, \@h, [ $body ] ] diff --git a/lib/PublicInbox/RepobrowseConfig.pm b/lib/PublicInbox/RepobrowseConfig.pm index 703212d9..77ef46bb 100644 --- a/lib/PublicInbox/RepobrowseConfig.pm +++ b/lib/PublicInbox/RepobrowseConfig.pm @@ -11,6 +11,10 @@ sub new { $file = default_file() unless defined($file); my $self = bless PublicInbox::Config::git_config_dump($file), $class; $self->{-cache} = {}; + + # hard disable these with '-' prefix by default: + $self->{'repobrowse.snapshots'} ||= '-tar.bz2 -tar.xz'; + # for root $self->{-groups} = { -hidden => [], -none => [] }; $self; @@ -40,6 +44,12 @@ sub lookup { $rv->{path} = $path; $rv->{repo} = $repo_path; + # snapshots: + my $snap = (split('/', $repo_path))[-1]; + $snap =~ s/\.git\z//; # seems common for git URLs to end in ".git" + $rv->{snapshot_re} = qr/\A\Q$snap\E[-_]/; + $rv->{snapshot_pfx} = $snap; + # gitweb compatibility foreach my $key (qw(description cloneurl)) { $rv->{$key} = try_cat("$path/$key"); @@ -48,9 +58,18 @@ sub lookup { $rv->{desc_html} = PublicInbox::Hval->new_oneline($rv->{description})->as_html; - foreach my $key (qw(publicinbox vcs readme group)) { + foreach my $key (qw(publicinbox vcs readme group snapshots)) { $rv->{$key} = $self->{"repo.$repo_path.$key"}; } + unless (defined $rv->{snapshots}) { + $rv->{snapshots} = $self->{'repobrowse.snapshots'} || ''; + } + + my %disabled; + foreach (split(/\s+/, $rv->{snapshots})) { + s/\A-// and $disabled{$_} = 1; + } + $rv->{snapshots_disabled} = \%disabled; my $g = $rv->{group}; defined $g or $g = '-none'; diff --git a/lib/PublicInbox/RepobrowseGitSnapshot.pm b/lib/PublicInbox/RepobrowseGitSnapshot.pm new file mode 100644 index 00000000..106d5651 --- /dev/null +++ b/lib/PublicInbox/RepobrowseGitSnapshot.pm @@ -0,0 +1,131 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# shows the /snapshot/ endpoint for git repositories +# Mainly for compatibility reasons with cgit, I'm unsure if +# showing this in a repository viewer is a good idea. + +package PublicInbox::RepobrowseGitSnapshot; +use strict; +use warnings; +use base qw(PublicInbox::RepobrowseBase); +use PublicInbox::Git; +our $SUFFIX; +BEGIN { + # as described in git-archive(1), users may add support for + # other compression schemes such as xz or bz2 via git-config(1): + # git config tar.tar.xz.command "xz -c" + # git config tar.tar.bz2.command "bzip2 -c" + chomp(my @l = `git archive --list`); + $SUFFIX = join('|', map { quotemeta $_ } @l); +} + +# Not using standard mime types since the compressed tarballs are +# special or do not match my /etc/mime.types. Choose what gitweb +# and cgit agree on for compatibility. +our %FMT_TYPES = ( + 'tar' => 'application/x-tar', + 'tar.bz2' => 'application/x-bzip2', + 'tar.gz' => 'application/x-gzip', + 'tar.xz' => 'application/x-xz', + 'tgz' => 'application/x-gzip', + 'zip' => 'application/x-zip', +); + +sub call_git_snapshot ($$) { # invoked by PublicInbox::RepobrowseBase::call + my ($self, $req) = @_; + + my @extra = @{$req->{extra}}; + my $ref = shift @extra; + return $self->r(404) if (!defined $ref) || scalar(@extra); + my $orig_fn = $ref; + + # just in case git changes refname rules, don't allow wonky filenames + # to break the Content-Disposition header, either. + return $self->r(404) if $orig_fn =~ /["\s]/s; + return $self->r(404) unless ($ref =~ s/\.($SUFFIX)\z//o); + my $fmt = $1; + + my $repo_info = $req->{repo_info}; + + # support disabling certain snapshots types entirely to twart + # URL guessing since it could burn server resources. + return $self->r(404) if $repo_info->{snapshots_disabled}->{$fmt}; + + # strip optional basename (may not exist) + $ref =~ s/$repo_info->{snapshot_re}//; + + # don't allow option/command injection, git refs do not start with '-' + return $self->r(404) if $ref =~ /\A-/; + + my $git = $repo_info->{git}; + my $tree; + + # try prefixing "v" or "V" for tag names + foreach my $r ($ref, "v$ref", "V$ref") { + $tree = $git->qx([qw(rev-parse --verify --revs-only), $r], + undef, { 2 => $git->err_begin }); + if (defined $tree) { + chomp $tree; + last if $tree ne ''; + } + } + return $self->r(404) if (!defined $tree || $tree eq ''); + + my $pfx = "$repo_info->{snapshot_pfx}-$ref/"; + my @cmd = ('archive', "--prefix=$pfx", "--format=$fmt", $tree); + $req->{rpipe} = $git->popen(\@cmd, undef, { 2 => $git->err_begin }); + + my $env = $req->{cgi}->env; + my $vin; + my $end = sub { + my ($n) = @_; + if (my $fh = delete $req->{fh}) { + $fh->close; + } elsif (my $res = delete $req->{res}) { + $res->($self->r(500)); + } + if (my $rpipe = delete $req->{rpipe}) { + $rpipe->close; # _may_ be Danga::Socket::close + } + }; + my $fail = sub { + if ($!{EAGAIN} || $!{EINTR}) { + select($vin, undef, undef, undef) if $vin; + # $vin is undef on async, so this is a noop + return; + } + my $e = $!; + $end->(); + my $err = $env->{'psgi.errors'}; + $err->print("git archive ($git->{git_dir}): $e\n"); + }; + my $cb = sub { + my $n = $req->{rpipe}->sysread(my $buf, 65536); + return $fail->() unless defined $n; + return $end->() if $n == 0; + if (my $res = delete $req->{res}) { + my $h = [ 'Content-Type', + $FMT_TYPES{$fmt} || 'application/octet-stream', + 'Content-Disposition', + qq(inline; filename="$orig_fn"), + 'ETag', qq("$tree") ]; + $req->{fh} = $res->([200, $h]); + } + my $fh = $req->{fh} or return; + $fh->write($buf); + }; + if (my $async = $env->{'pi-httpd.async'}) { + $req->{rpipe} = $async->($req->{rpipe}, $cb); + sub { $req->{res} = $_[0] } # let Danga::Socket handle the rest. + } else { # synchronous loop for other PSGI servers + $vin = ''; + vec($vin, fileno($req->{rpipe}), 1) = 1; + sub { + $req->{res} = $_[0]; # Plack response callback + while ($req->{rpipe}) { $cb->() } + } + } +} + +1; |