diff options
Diffstat (limited to 'lib')
38 files changed, 3613 insertions, 227 deletions
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index f6275cdd..f2c40b13 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -167,4 +167,14 @@ sub _fill { $self->{-by_name}->{$name} = $rv; } +sub try_cat { + my ($path) = @_; + my $rv = ''; + if (open(my $fh, '<', $path)) { + local $/; + $rv = <$fh>; + } + $rv; +} + 1; diff --git a/lib/PublicInbox/GetlineBody.pm b/lib/PublicInbox/GetlineBody.pm index 5f327828..ccc66e48 100644 --- a/lib/PublicInbox/GetlineBody.pm +++ b/lib/PublicInbox/GetlineBody.pm @@ -9,8 +9,13 @@ use strict; use warnings; sub new { - my ($class, $rpipe, $end, $buf) = @_; - bless { rpipe => $rpipe, end => $end, buf => $buf }, $class; + my ($class, $rpipe, $end, $buf, $filter) = @_; + bless { + rpipe => $rpipe, + end => $end, + buf => $buf, + filter => $filter || 0, + }, $class; } # close should always be called after getline returns undef, @@ -20,8 +25,13 @@ sub DESTROY { $_[0]->close } sub getline { my ($self) = @_; + my $filter = $self->{filter}; + return if $filter == -1; # last call was EOF + my $buf = delete $self->{buf}; # initial buffer - defined $buf ? $buf : $self->{rpipe}->getline; + $buf = $self->{rpipe}->getline unless defined $buf; + $self->{filter} = -1 unless defined $buf; # set EOF for next call + $filter ? $filter->($buf) : $buf; } sub close { diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 59c27470..caca3b09 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -12,12 +12,49 @@ use warnings; use POSIX qw(dup2); require IO::Handle; use PublicInbox::Spawn qw(spawn popen_rd); +use Fcntl qw(:seek); +my $have_async = eval { + require PublicInbox::EvCleanup; + require PublicInbox::GitAsync; +}; + +# Documentation/SubmittingPatches recommends 12 (Linux v4.4) +my $abbrev = `git config core.abbrev` || 12; + +sub abbrev { "--abbrev=$abbrev" } sub new { my ($class, $git_dir) = @_; bless { git_dir => $git_dir }, $class } +sub err_begin ($) { + my $err = $_[0]->{err}; + unless ($err) { + open($err, '+>', undef); + $_[0]->{err} = $err; + } + sysseek($err, 0, SEEK_SET) or die "sysseek failed: $!"; + truncate($err, 0) or die "truncate failed: $!"; + my $ret = fileno($err); + defined $ret or die "fileno failed: $!"; + $ret; +} + +sub err ($) { + my $err = $_[0]->{err} or return ''; + sysseek($err, 0, SEEK_SET) or die "sysseek failed: $!"; + defined(sysread($err, my $buf, -s $err)) or die "sysread failed: $!"; + sysseek($err, 0, SEEK_SET) or die "sysseek failed: $!"; + truncate($err, 0) or die "truncate failed: $!"; + $buf; +} + +sub cmd { + my $self = shift; + [ 'git', "--git-dir=$self->{git_dir}", @_ ]; +} + sub _bidi_pipe { my ($self, $batch, $in, $out, $pid) = @_; return if $self->{$pid}; @@ -26,9 +63,8 @@ sub _bidi_pipe { pipe($in_r, $in_w) or fail($self, "pipe failed: $!"); pipe($out_r, $out_w) or fail($self, "pipe failed: $!"); - my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch); my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) }; - my $p = spawn(\@cmd, undef, $redir); + my $p = spawn(cmd($self, qw(cat-file), $batch), undef, $redir); defined $p or fail($self, "spawn failed: $!"); $self->{$pid} = $p; $out_w->autoflush(1); @@ -36,20 +72,46 @@ sub _bidi_pipe { $self->{$in} = $in_r; } -sub cat_file { - my ($self, $obj, $ref) = @_; - - batch_prepare($self); +# legacy synchronous API +sub cat_file_begin { + my ($self, $obj) = @_; + $self->_bidi_pipe(qw(--batch in out pid)); $self->{out}->print($obj, "\n") or fail($self, "write error: $!"); my $in = $self->{in}; local $/ = "\n"; my $head = $in->getline; $head =~ / missing$/ and return undef; - $head =~ /^[0-9a-f]{40} \S+ (\d+)$/ or + $head =~ /^([0-9a-f]{40}) (\S+) (\d+)$/ or fail($self, "Unexpected result from git cat-file: $head"); - my $size = $1; + ($in, $1, $2, $3); +} + +# legacy synchronous API +sub cat_file_finish { + my ($self, $left) = @_; + my $max = 8192; + my $in = $self->{in}; + my $buf; + while ($left > 0) { + my $r = read($in, $buf, $left > $max ? $max : $left); + defined($r) or fail($self, "read failed: $!"); + $r == 0 and fail($self, 'exited unexpectedly'); + $left -= $r; + } + + my $r = read($in, $buf, 1); + defined($r) or fail($self, "read failed: $!"); + fail($self, 'newline missing after blob') if ($r != 1 || $buf ne "\n"); +} + +# legacy synchronous API +sub cat_file { + my ($self, $obj, $ref) = @_; + + my ($in, $hex, $type, $size) = $self->cat_file_begin($obj); + return unless $in; my $ref_type = $ref ? ref($ref) : ''; my $rv; @@ -58,16 +120,8 @@ sub cat_file { my $cb_err; if ($ref_type eq 'CODE') { - $rv = eval { $ref->($in, \$left) }; + $rv = eval { $ref->($in, \$left, $type, $hex) }; $cb_err = $@; - # drain the rest - my $max = 8192; - while ($left > 0) { - my $r = read($in, my $x, $left > $max ? $max : $left); - defined($r) or fail($self, "read failed: $!"); - $r == 0 and fail($self, 'exited unexpectedly'); - $left -= $r; - } } else { my $offset = 0; my $buf = ''; @@ -80,10 +134,7 @@ sub cat_file { } $rv = \$buf; } - - my $r = read($in, my $buf, 1); - defined($r) or fail($self, "read failed: $!"); - fail($self, 'newline missing after blob') if ($r != 1 || $buf ne "\n"); + $self->cat_file_finish($left); die $cb_err if $cb_err; $rv; @@ -91,6 +142,7 @@ sub cat_file { sub batch_prepare ($) { _bidi_pipe($_[0], qw(--batch in out pid)) } +# legacy synchronous API sub check { my ($self, $obj) = @_; $self->_bidi_pipe(qw(--batch-check in_c out_c pid_c)); @@ -119,8 +171,16 @@ sub fail { sub popen { my ($self, @cmd) = @_; - @cmd = ('git', "--git-dir=$self->{git_dir}", @cmd); - popen_rd(\@cmd); + my $cmd = cmd($self); + my ($env, $opt); + if (ref $cmd[0]) { + push @$cmd, @{$cmd[0]}; + $env = $cmd[1]; + $opt = $cmd[2]; + } else { + push @$cmd, @cmd; + } + popen_rd($cmd, $env, $opt); } sub qx { @@ -137,10 +197,91 @@ sub cleanup { my ($self) = @_; _destroy($self, qw(in out pid)); _destroy($self, qw(in_c out_c pid_c)); + + if ($have_async) { + my %h = %$self; # yup, copy ourselves + %$self = (); + my $ds_closed; + + # schedule closing with Danga::Socket::close: + foreach (qw(async async_c)) { + my $ds = delete $h{$_} or next; + $ds->close; + $ds_closed = 1; + } + + # can't do waitpid in _destroy() until next tick, + # since D::S defers closing until end of current event loop + $ds_closed and PublicInbox::EvCleanup::next_tick(sub { + _destroy(\%h, qw(in_a out_a pid_a)); + _destroy(\%h, qw(in_ac out_ac pid_ac)); + }); + } } sub DESTROY { cleanup(@_) } +# modern async API +sub check_async_ds ($$$) { + my ($self, $obj, $cb) = @_; + ($self->{async_c} ||= do { + _bidi_pipe($self, qw(--batch-check in_ac out_ac pid_ac)); + PublicInbox::GitAsync->new($self->{in_ac}, $self->{out_ac}, 1); + })->cat_file_async($obj, $cb); +} + +sub cat_async_ds ($$$) { + my ($self, $obj, $cb) = @_; + ($self->{async} ||= do { + _bidi_pipe($self, qw(--batch in_a out_a pid_a)); + PublicInbox::GitAsync->new($self->{in_a}, $self->{out_a}); + })->cat_file_async($obj, $cb); +} + +sub async_info_compat ($) { + local $/ = "\n"; + chomp(my $line = $_[0]->getline); + [ split(/ /, $line) ]; +} + +sub check_async_compat ($$$) { + my ($self, $obj, $cb) = @_; + $self->_bidi_pipe(qw(--batch-check in_c out_c pid_c)); + $self->{out_c}->print($obj."\n") or fail($self, "write error: $!"); + my $info = async_info_compat($self->{in_c}); + $cb->($info); +} + +sub cat_async_compat ($$$) { + my ($self, $obj, $cb) = @_; + $self->_bidi_pipe(qw(--batch in out pid)); + $self->{out}->print($obj."\n") or fail($self, "write error: $!"); + my $in = $self->{in}; + my $info = async_info_compat($in); + $cb->($info); + return if scalar(@$info) != 3; # missing + my $max = 8192; + my $left = $info->[2]; + my ($buf, $r); + while ($left > 0) { + $r = read($in, $buf, $left > $max ? $max : $left); + return $cb->($r) unless $r; # undef or 0 + $left -= $r; + $cb->(\$buf); + } + $r = read($in, $buf, 1); + defined($r) or fail($self, "read failed: $!"); + fail($self, 'newline missing after blob') if ($r != 1 || $buf ne "\n"); +} + +if ($have_async) { + *check_async = *check_async_ds; + *cat_async = *cat_async_ds; +} else { + *check_async = *check_async_compat; + *cat_async = *cat_async_compat; +} + 1; __END__ =pod diff --git a/lib/PublicInbox/GitAsync.pm b/lib/PublicInbox/GitAsync.pm new file mode 100644 index 00000000..8369978c --- /dev/null +++ b/lib/PublicInbox/GitAsync.pm @@ -0,0 +1,133 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# internal class used by PublicInbox::Git + Danga::Socket +# This parses the output pipe of "git cat-file --batch/--batch-check" +package PublicInbox::GitAsync; +use strict; +use warnings; +use base qw(Danga::Socket); +use fields qw(jobq rbuf wr check); +use PublicInbox::GitAsyncWr; +our $MAX = 65536; # Import may bump this in the future + +sub new { + my ($class, $rd, $wr, $check) = @_; + my $self = fields::new($class); + IO::Handle::blocking($rd, 0); + $self->SUPER::new($rd); + $self->{jobq} = []; # [ [ $obj, $cb, $state ], ... ] + my $buf = ''; + $self->{rbuf} = \$buf; + $self->{wr} = PublicInbox::GitAsyncWr->new($wr); + $self->{check} = $check; + $self->watch_read(1); + $self; +} + +sub cat_file_async { + my ($self, $obj, $cb) = @_; + # order matters + push @{$self->{jobq}}, [ $obj, $cb ]; + $self->{wr}->write(\"$obj\n"); +} + +# Returns: an array ref of the info line for --batch-check and --batch, +# which may be: [ $obj, 'missing'] +# Returns undef on error +sub read_info ($) { + my ($self) = @_; + my $rbuf = $self->{rbuf}; + my $rd = $self->{sock}; + + while (1) { + $$rbuf =~ s/\A([^\n]+)\n//s and return [ split(/ /, $1) ]; + + my $r = sysread($rd, $$rbuf, 110, length($$rbuf)); + next if $r; + return $r; + } +} + +sub event_read { + my ($self) = @_; + my $jobq = $self->{jobq}; + my ($cur, $obj, $cb, $info, $left); + my $check = $self->{check}; + my ($rbuf, $rlen, $need, $buf); +take_job: + $cur = shift @$jobq or die 'BUG: empty job queue in '.__PACKAGE__; + ($obj, $cb, $info, $left) = @$cur; + if (!$info) { + $info = read_info($self); + if (!defined $info && ($!{EAGAIN} || $!{EINTR})) { + return unshift(@$jobq, $cur) + } + $cb->($info); # $info may 0 (EOF, or undef, $cb will see $!) + return $self->close unless $info; + if ($check || (scalar(@$info) != 3)) { + # do not monopolize the event loop if we're drained: + return if ${$self->{rbuf}} eq ''; + goto take_job; + } + $cur->[2] = $info; + my $len = $info->[2]; + $left = \$len; + $cur->[3] = $left; # onto reading body... + } + ref($left) or die 'BUG: $left not ref in '.__PACKAGE__; + + $rbuf = $self->{rbuf}; + $rlen = length($$rbuf); + $need = $$left + 1; # +1 for trailing LF + $buf = ''; + + if ($rlen == $need) { +final_hunk: + $self->{rbuf} = \$buf; + $$left = undef; + my $lf = chop $$rbuf; + $lf eq "\n" or die "BUG: missing LF (got $lf)"; + $cb->($rbuf); + + return if $buf eq ''; + goto take_job; + } elsif ($rlen < $need) { + my $all = $need - $rlen; + my $n = $all > $MAX ? $MAX : $all; + my $r = sysread($self->{sock}, $$rbuf, $n, $rlen); + if ($r) { + goto final_hunk if $r == $all; + + # more to read later... + $$left -= $r; + $self->{rbuf} = \$buf; + $cb->($rbuf); + + # don't monopolize the event loop + return unshift(@$jobq, $cur); + } elsif (!defined $r) { + return unshift(@$jobq, $cur) if $!{EAGAIN} || $!{EINTR}; + } + $cb->($r); # $cb should handle 0 and undef (and see $!) + $self->close; # FAIL... + } else { # too much data in rbuf + $buf = substr($$rbuf, $need, $rlen - $need); + $$rbuf = substr($$rbuf, 0, $need); + goto final_hunk; + } +} + +sub close { + my $self = shift; + my $jobq = $self->{jobq}; + $self->{jobq} = []; + $_->[1]->(0) for @$jobq; + $self->{wr}->close; + $self->SUPER::close(@_); +} + +sub event_hup { $_[0]->close } +sub event_err { $_[0]->close } + +1; diff --git a/lib/PublicInbox/GitAsyncWr.pm b/lib/PublicInbox/GitAsyncWr.pm new file mode 100644 index 00000000..c22f2fcc --- /dev/null +++ b/lib/PublicInbox/GitAsyncWr.pm @@ -0,0 +1,23 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# internal class used by PublicInbox::Git + Danga::Socket +# This writes to the input pipe of "git cat-file --batch/--batch-check" +package PublicInbox::GitAsyncWr; +use strict; +use warnings; +use base qw(Danga::Socket); + +sub new { + my ($class, $io) = @_; + my $self = fields::new($class); + IO::Handle::blocking($io, 0); + $self->SUPER::new($io); +} + +# we only care about write + event_write + +sub event_hup { $_[0]->close } +sub event_err { $_[0]->close } + +1; diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm index 1fa5e30e..d02b0c0e 100644 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -55,8 +55,8 @@ sub serve { $path =~ /\Agit-\w+-pack\z/) { my $ok = serve_smart($env, $git, $path); return $ok if $ok; + # fall through to dumb HTTP... } - serve_dumb($env, $git, $path); } @@ -200,69 +200,15 @@ sub serve_smart { $env{$name} = $val if defined $val; } my $limiter = $git->{-httpbackend_limiter} || $default_limiter; - my $git_dir = $git->{git_dir}; $env{GIT_HTTP_EXPORT_ALL} = '1'; - $env{PATH_TRANSLATED} = "$git_dir/$path"; + $env{PATH_TRANSLATED} = "$git->{git_dir}/$path"; my $rdr = { 0 => fileno($in) }; my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr); - my ($fh, $rpipe); - my $end = sub { - if (my $err = $qsp->finish) { - err($env, "git http-backend ($git_dir): $err"); - } - $fh->close if $fh; # async-only - }; - - # Danga::Socket users, we queue up the read_enable callback to - # fire after pending writes are complete: - my $buf = ''; - my $rd_hdr = sub { - my $r = sysread($rpipe, $buf, 1024, length($buf)); - return if !defined($r) && ($!{EINTR} || $!{EAGAIN}); - return r(500, 'http-backend error') unless $r; - $r = parse_cgi_headers(\$buf) or return; # incomplete headers + $qsp->psgi_return($env, $limiter, sub { + my ($r, $bref) = @_; + $r = parse_cgi_headers($bref) or return; # incomplete headers $r->[0] == 403 ? serve_dumb($env, $git, $path) : $r; - }; - my $res; - my $async = $env->{'pi-httpd.async'}; # XXX unstable API - my $cb = sub { - my $r = $rd_hdr->() or return; - $rd_hdr = undef; - if (scalar(@$r) == 3) { # error: - if ($async) { - $async->close; # calls rpipe->close - } else { - $rpipe->close; - $end->(); - } - $res->($r); - } elsif ($async) { - $fh = $res->($r); - $async->async_pass($env->{'psgix.io'}, $fh, \$buf); - } else { # for synchronous PSGI servers - require PublicInbox::GetlineBody; - $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end, - $buf); - $res->($r); - } - }; - sub { - ($res) = @_; - - # hopefully this doesn't break any middlewares, - # holding the input here is a waste of FDs and memory - $env->{'psgi.input'} = undef; - - $qsp->start($limiter, sub { # may run later, much later... - ($rpipe) = @_; - $in = undef; - if ($async) { - $async = $async->($rpipe, $cb, $end); - } else { # generic PSGI - $cb->() while $rd_hdr; - } - }); - }; + }); } sub input_to_file { diff --git a/lib/PublicInbox/GitIdx.pm b/lib/PublicInbox/GitIdx.pm new file mode 100644 index 00000000..919672a9 --- /dev/null +++ b/lib/PublicInbox/GitIdx.pm @@ -0,0 +1,67 @@ +# Copyright (C) 2017 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +package PublicInbox::GitIdx; +use strict; +use warnings; +use base qw(Exporter); +our @EXPORT = qw(git_umask_for with_umask); +use PublicInbox::Git; +use constant { + PERM_UMASK => 0, + OLD_PERM_GROUP => 1, + OLD_PERM_EVERYBODY => 2, + PERM_GROUP => 0660, + PERM_EVERYBODY => 0664, +}; + +sub _git_config_perm ($) { + my ($git) = @_; + my @cmd = qw(config core.sharedRepository); + $git = PublicInbox::Git->new($git) unless ref $git; + my $perm = $git->qx(@cmd); + chomp $perm if defined $perm; + return PERM_GROUP if (!defined($perm) || $perm eq ''); + return PERM_UMASK if ($perm eq 'umask'); + return PERM_GROUP if ($perm eq 'group'); + if ($perm =~ /\A(?:all|world|everybody)\z/) { + return PERM_EVERYBODY; + } + return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/); + return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/); + + my $i = oct($perm); + return PERM_UMASK if ($i == PERM_UMASK); + return PERM_GROUP if ($i == OLD_PERM_GROUP); + return PERM_EVERYBODY if ($i == OLD_PERM_EVERYBODY); + + if (($i & 0600) != 0600) { + die "core.sharedRepository mode invalid: ". + sprintf('%.3o', $i) . "\nOwner must have permissions\n"; + } + ($i & 0666); +} + +sub git_umask_for ($) { + my ($git) = @_; + my $perm = _git_config_perm($git); + my $rv = $perm; + return umask if $rv == 0; + + # set +x bit if +r or +w were set + $rv |= 0100 if ($rv & 0600); + $rv |= 0010 if ($rv & 0060); + $rv |= 0001 if ($rv & 0006); + (~$rv & 0777); +} + +sub with_umask ($$) { + my ($umask, $cb) = @_; + my $old = umask $umask; + my $rv = eval { $cb->() }; + my $err = $@; + umask $old; + die $err if $@; + $rv; +} + +1; diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index 3530f8ba..8ba41ba5 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -241,6 +241,7 @@ sub response_done_cb ($$) { sub { my $env = $self->{env}; $self->{env} = undef; + %$env = () if $env; # prevent circular references $self->write("0\r\n\r\n") if $alive == 2; $self->write(sub{$alive ? next_request($self) : $self->close}); } @@ -472,7 +473,7 @@ sub close { my $self = shift; my $forward = $self->{forward}; my $env = $self->{env}; - delete $env->{'psgix.io'} if $env; # prevent circular references + %$env = () if $env; # prevent circular references $self->{pull} = $self->{forward} = $self->{env} = undef; if ($forward) { eval { $forward->close }; diff --git a/lib/PublicInbox/HTTPD/Async.pm b/lib/PublicInbox/HTTPD/Async.pm index 54b62451..71175692 100644 --- a/lib/PublicInbox/HTTPD/Async.pm +++ b/lib/PublicInbox/HTTPD/Async.pm @@ -23,6 +23,7 @@ sub new { $self; } +# fires after pending writes are complete: sub restart_read_cb ($) { my ($self) = @_; sub { $self->watch_read(1) } @@ -35,14 +36,16 @@ sub main_cb ($$$) { my $r = sysread($self->{sock}, $$bref, 8192); if ($r) { $fh->write($$bref); - return if $http->{closed}; - if ($http->{write_buf_size}) { - $self->watch_read(0); - $http->write(restart_read_cb($self)); + unless ($http->{closed}) { # Danga::Socket sets this + if ($http->{write_buf_size}) { + $self->watch_read(0); + $http->write(restart_read_cb($self)); + } + # stay in watch_read, but let other clients + # get some work done, too. + return; } - # stay in watch_read, but let other clients - # get some work done, too. - return; + # fall through to close below... } elsif (!defined $r) { return if $!{EAGAIN} || $!{EINTR}; } @@ -66,7 +69,6 @@ sub async_pass { sub event_read { $_[0]->{cb}->(@_) } sub event_hup { $_[0]->{cb}->(@_) } sub event_err { $_[0]->{cb}->(@_) } -sub sysread { shift->{sock}->sysread(@_) } sub close { my $self = shift; diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 77acecda..15b5fd3e 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -8,16 +8,28 @@ use strict; use warnings; use Encode qw(find_encoding); use PublicInbox::MID qw/mid_clean mid_escape/; +use URI::Escape qw(uri_escape_utf8); use base qw/Exporter/; -our @EXPORT_OK = qw/ascii_html/; +our @EXPORT_OK = qw/ascii_html utf8_html to_attr from_attr/; # for user-generated content (UGC) which may have excessively long lines # and screw up rendering on some browsers. This is the only CSS style # feature we use. use constant STYLE => '<style>pre{white-space:pre-wrap}</style>'; +my $enc_utf8 = find_encoding('UTF-8'); my $enc_ascii = find_encoding('us-ascii'); +sub utf8 { + my ($class, $raw, $href) = @_; + + $raw = $enc_utf8->decode($raw); + bless { + raw => $raw, + href => defined $href ? $href : $raw, + }, $class; +} + sub new { my ($class, $raw, $href) = @_; @@ -71,7 +83,19 @@ sub ascii_html { $enc_ascii->encode($s, Encode::HTMLCREF); } +sub utf8_html { + my ($raw) = @_; + ascii_html($enc_utf8->decode($raw)); +} + sub as_html { ascii_html($_[0]->{raw}) } +sub as_href { ascii_html(uri_escape_utf8($_[0]->{href})) } + +sub as_path { + my $p = uri_escape_utf8($_[0]->{href}); + $p =~ s!%2[fF]!/!g; + ascii_html($p); +} sub raw { if (defined $_[1]) { @@ -86,4 +110,35 @@ sub prurl { index($u, '//') == 0 ? "$env->{'psgi.url_scheme'}:$u" : $u; } +# convert a filename (or any string) to HTML attribute + +my %ESCAPES = map { chr($_) => sprintf('::%02x', $_) } (0..255); +$ESCAPES{'/'} = ':'; # common + +sub to_attr ($) { + my ($str) = @_; + + # git would never do this to us: + die "invalid filename: $str" if index($str, '//') >= 0; + + my $first = ''; + if ($str =~ s/\A([^A-Ya-z])//ms) { # start with a letter + $first = sprintf('Z%02x', ord($1)); + } + $str =~ s/([^A-Za-z0-9_\.\-])/$ESCAPES{$1}/egms; + $first . $str; +} + +# reverse the result of to_attr +sub from_attr ($) { + my ($str) = @_; + my $first = ''; + if ($str =~ s/\AZ([a-f0-9]{2})//ms) { + $first = chr(hex($1)); + } + $str =~ s!::([a-f0-9]{2})!chr(hex($1))!egms; + $str =~ tr!:!/!; + $first . $str; +} + 1; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index a0d69f18..05d04530 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -6,6 +6,7 @@ package PublicInbox::Inbox; use strict; use warnings; use PublicInbox::Git; +use PublicInbox::Config; use PublicInbox::MID qw(mid2path); use Devel::Peek qw(SvREFCNT); @@ -102,21 +103,11 @@ sub search { }; } -sub try_cat { - my ($path) = @_; - my $rv = ''; - if (open(my $fh, '<', $path)) { - local $/; - $rv = <$fh>; - } - $rv; -} - sub description { my ($self) = @_; my $desc = $self->{description}; return $desc if defined $desc; - $desc = try_cat("$self->{mainrepo}/description"); + $desc = PublicInbox::Config::try_cat("$self->{mainrepo}/description"); local $/ = "\n"; chomp $desc; $desc =~ s/\s+/ /smg; @@ -128,7 +119,7 @@ sub cloneurl { my ($self) = @_; my $url = $self->{cloneurl}; return $url if $url; - $url = try_cat("$self->{mainrepo}/cloneurl"); + $url = PublicInbox::Config::try_cat("$self->{mainrepo}/cloneurl"); my @url = split(/\s+/s, $url); local $/ = "\n"; chomp @url; @@ -211,16 +202,16 @@ sub nntp_usable { $ret; } -sub msg_by_path ($$;$) { +sub msg_by_path ($$) { my ($self, $path, $ref) = @_; # TODO: allow other refs: - my $str = git($self)->cat_file('HEAD:'.$path, $ref); + my $str = git($self)->cat_file('HEAD:'.$path); $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str; $str; } -sub msg_by_smsg ($$;$) { - my ($self, $smsg, $ref) = @_; +sub msg_by_smsg ($$) { + my ($self, $smsg) = @_; return unless defined $smsg; # ghost @@ -229,7 +220,7 @@ sub msg_by_smsg ($$;$) { defined(my $blob = $smsg->{blob}) or return msg_by_mid($self, $smsg->mid); - my $str = git($self)->cat_file($blob, $ref); + my $str = git($self)->cat_file($blob); $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str; $str; } @@ -239,9 +230,9 @@ sub path_check { git($self)->check('HEAD:'.$path); } -sub msg_by_mid ($$;$) { - my ($self, $mid, $ref) = @_; - msg_by_path($self, mid2path($mid), $ref); +sub msg_by_mid ($$) { + my ($self, $mid) = @_; + msg_by_path($self, mid2path($mid)); } 1; diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index 4950da25..73022656 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -1,7 +1,9 @@ # Copyright (C) 2016 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# Limits the number of processes spawned +# Generic process management framework to limits the number of +# processes spawned in public-inbox-httpd, but has generic fallbacks +# to work on any PSGI server. # This does not depend on Danga::Socket or any other external # scheduling mechanism, you just need to call start and finish # appropriately @@ -9,6 +11,8 @@ package PublicInbox::Qspawn; use strict; use warnings; use PublicInbox::Spawn qw(popen_rd); +require Plack::Util; +my $def_limiter; sub new ($$$;) { my ($class, $cmd, $env, $opt) = @_; @@ -59,6 +63,119 @@ sub start { } } +sub _psgi_finish ($$) { + my ($self, $env) = @_; + my $err = $self->finish; + if ($err && !$env->{'qspawn.quiet'}) { + $err = join(' ', @{$self->{args}->[0]}).": $err\n"; + $env->{'psgi.errors'}->print($err); + } +} + +sub psgi_qx { + my ($self, $env, $limiter, $qx_cb) = @_; + my $qx = PublicInbox::Qspawn::Qx->new; + my $end = sub { + _psgi_finish($self, $env); + eval { $qx_cb->($qx) }; + $qx = undef; + }; + my $rpipe; + my $async = $env->{'pi-httpd.async'}; + my $cb = sub { + my $r = sysread($rpipe, my $buf, 8192); + if ($async) { + $async->async_pass($env->{'psgix.io'}, $qx, \$buf); + } elsif (defined $r) { + $r ? $qx->write($buf) : $end->(); + } else { + return if $!{EAGAIN} || $!{EINTR}; # loop again + $end->(); + } + }; + $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32); + $self->start($limiter, sub { # may run later, much later... + ($rpipe) = @_; + if ($async) { + # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end) + $async = $async->($rpipe, $cb, $end); + } else { # generic PSGI + $cb->() while $qx; + } + }); +} + +# create a filter for "push"-based streaming PSGI writes used by HTTPD::Async +sub filter_fh ($$) { + my ($fh, $filter) = @_; + Plack::Util::inline_object( + close => sub { + $fh->write($filter->(undef)); + $fh->close; + }, + write => sub { + $fh->write($filter->($_[0])); + }); +} + +sub psgi_return { + my ($self, $env, $limiter, $parse_hdr) = @_; + my ($fh, $rpipe); + my $end = sub { + _psgi_finish($self, $env); + $fh->close if $fh; # async-only + }; + + my $buf = ''; + my $rd_hdr = sub { + my $r = sysread($rpipe, $buf, 1024, length($buf)); + return if !defined($r) && ($!{EINTR} || $!{EAGAIN}); + $parse_hdr->($r, \$buf); + }; + my $res = delete $env->{'qspawn.response'}; + my $async = $env->{'pi-httpd.async'}; + my $cb = sub { + my $r = $rd_hdr->() or return; + $rd_hdr = undef; + my $filter = delete $env->{'qspawn.filter'}; + if (scalar(@$r) == 3) { # error + if ($async) { + $async->close; # calls rpipe->close and $end + } else { + $rpipe->close; + $end->(); + } + $res->($r); + } elsif ($async) { + $fh = $res->($r); # scalar @$r == 2 + $fh = filter_fh($fh, $filter) if $filter; + $async->async_pass($env->{'psgix.io'}, $fh, \$buf); + } else { # for synchronous PSGI servers + require PublicInbox::GetlineBody; + $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end, + $buf, $filter); + $res->($r); + } + }; + $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32); + my $start_cb = sub { # may run later, much later... + ($rpipe) = @_; + if ($async) { + # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end) + $async = $async->($rpipe, $cb, $end); + } else { # generic PSGI + $cb->() while $rd_hdr; + } + }; + + return $self->start($limiter, $start_cb) if $res; + + sub { + ($res) = @_; + $self->start($limiter, $start_cb); + }; +} + package PublicInbox::Qspawn::Limiter; use strict; use warnings; @@ -73,4 +190,21 @@ sub new { }, $class; } +# captures everything into a buffer and executes a callback when done +package PublicInbox::Qspawn::Qx; +use strict; +use warnings; + +sub new { + my ($class) = @_; + my $buf = ''; + bless \$buf, $class; +} + +# called by PublicInbox::HTTPD::Async ($fh->write) +sub write { + ${$_[0]} .= $_[1]; + undef; +} + 1; diff --git a/lib/PublicInbox/Repo.pm b/lib/PublicInbox/Repo.pm new file mode 100644 index 00000000..e53d6975 --- /dev/null +++ b/lib/PublicInbox/Repo.pm @@ -0,0 +1,47 @@ +# Copyright (C) 2017 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# Represents a code repository, analoguos to the PublicInbox::Inbox +# class for represpenting an inbox git repository. +package PublicInbox::Repo; +use strict; +use warnings; +use PublicInbox::Config; + +sub new { + my ($class, $opts) = @_; + bless $opts, $class; +} + +sub description { + my ($self) = @_; + my $desc = $self->{description}; + return $desc if defined $desc; + $desc = PublicInbox::Config::try_cat("$self->{path}/description"); + local $/ = "\n"; + chomp $desc; + $desc =~ s/\s+/ /smg; + $desc = '($GIT_DIR/description missing)' if $desc eq ''; + $self->{description} = $desc; +} + +sub desc_html { + my ($self) = @_; + $self->{desc_html} ||= + PublicInbox::Hval->utf8($self->description)->as_html; +} + +sub cloneurl { + my ($self) = @_; + my $url = $self->{cloneurl}; + return $url if $url; + if ($self->{vcs} eq 'git') { + $url = PublicInbox::Config::try_cat("$self->{path}/cloneurl"); + $url = [ split(/\s+/s, $url) ]; + local $/ = "\n"; + chomp @$url; + } + $self->{cloneurl} = $url; +} + +1; diff --git a/lib/PublicInbox/RepoBase.pm b/lib/PublicInbox/RepoBase.pm new file mode 100644 index 00000000..97f13b25 --- /dev/null +++ b/lib/PublicInbox/RepoBase.pm @@ -0,0 +1,115 @@ +# Copyright (C) 2015 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +package PublicInbox::RepoBase; +use strict; +use warnings; +require PublicInbox::RepoGitQuery; +use PublicInbox::Hval; +our %MIME_TYPE_WHITELIST = ('application/pdf' => 1); + +sub new { bless {}, shift } + +sub call { + my ($self, $cmd, $req) = @_; + my $vcs = $req->{repo_info}->{vcs}; + my $rv = eval { + no strict 'refs'; + my $sub = "call_${vcs}_$cmd"; + $self->$sub($req); + }; + $@ ? [ 500, ['Content-Type'=>'text/plain'], [] ] : $rv; +} + +sub mime_load { + my ($self, $file) = @_; + my %rv; + open my $fh, '<', $file or return \%rv; + while (<$fh>) { + next if /^#/; # no comments + my ($type, @ext) = split(/\s+/); + + if (defined $type) { + $rv{$_} = $type foreach @ext; + } + } + \%rv; +} + +# returns undef if missing, so users can scan the blob if needed +sub mime_type_unsafe { + my ($self, $fn) = @_; + $fn =~ /\.([^\.]+)\z/ or return; + my $ext = $1; + my $m = $self->{mime_types} ||= $self->mime_load('/etc/mime.types'); + $m->{$ext}; +} + +sub mime_type { + my ($self, $fn) = @_; + my $ct = $self->mime_type_unsafe($fn); + return unless defined $ct; + + # XSS protection. Assume the browser knows what to do + # with images/audio/video; but don't allow random HTML from + # a repository to be served + ($ct =~ m!\A(?:image|audio|video)/! || $MIME_TYPE_WHITELIST{$ct}) ? + $ct : undef; +} + +# starts an HTML page for Repobrowse in a consistent way +sub html_start { + my ($self, $req, $title_html, $opts) = @_; + my $desc = $req->{repo_info}->desc_html; + my $meta = ''; + + if ($opts) { + my @robots; + foreach (qw(nofollow noindex)) { + push @robots, $_ if $opts->{$_}; + } + $meta = qq(<meta\nname=robots\ncontent=") . + join(',', @robots) . '" />'; + } + + "<html><head><title>$title_html</title>" . + PublicInbox::Hval::STYLE . $meta . + "</head><body><pre><b>$desc</b>"; +} + +sub r { + my ($self, $status, $req, @extra) = @_; + my @h; + + my $body = ''; + if ($status == 301 || $status == 302) { + # The goal is to be able to make redirects like we make + # <a href=> tags with '../' + my $env = $req->{env}; + my $base = PublicInbox::Repobrowse::base_url($env); + my ($redir) = @extra; + if ($redir =~ m!\A\.\./!) { # relative redirect + my @orig = split(m!/+!, $env->{PATH_INFO}); + my @dest = split(m!/+!, $redir); + + while ($dest[0] eq '..') { + pop @orig; + shift @dest; + } + my $end = ''; + $end = pop @dest if $dest[-1] =~ /\A[#\?]/; + $redir = $base . join('/', @orig, @dest) . $end; + } else { + $redir = $base . '/' . $redir; + } + push @h, qw(Content-Type text/plain Location), $redir; + + # mainly for curl (no-'-L') users: + $body = "Redirecting to $redir\n"; + } else { + push @h, qw(Content-Type text/plain); + } + + [ $status, \@h, [ $body ] ] +} + +1; diff --git a/lib/PublicInbox/RepoConfig.pm b/lib/PublicInbox/RepoConfig.pm new file mode 100644 index 00000000..e1e2860b --- /dev/null +++ b/lib/PublicInbox/RepoConfig.pm @@ -0,0 +1,78 @@ +# Copyright (C) 2015 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +package PublicInbox::RepoConfig; +use strict; +use warnings; +use PublicInbox::Config; +use PublicInbox::Repo; +require PublicInbox::Hval; + +sub new { + my ($class, $file) = @_; + $file = default_file() unless defined($file); + my $self = bless PublicInbox::Config::git_config_dump($file), $class; + $self->{-cache} = {}; + + # hard disable these with '-' prefix by default: + $self->{'repobrowse.snapshots'} ||= '-tar.bz2 -tar.xz'; + + # for root + $self->{-groups} = { -hidden => [], -none => [] }; + $self; +} + +sub default_file { + my $f = $ENV{REPOBROWSE_CONFIG}; + return $f if defined $f; + PublicInbox::Config::config_dir() . '/repobrowse_config'; +} + +# Returns something like: +# { +# path => '/home/git/foo.git', +# publicinbox => '/home/pub/foo-public.git', +# } +sub lookup { + my ($self, $repo_path) = @_; # "git.git" + my $rv; + + $rv = $self->{-cache}->{$repo_path} and return $rv; + + my $path = $self->{"repo.$repo_path.path"}; + (defined $path && -d $path) or return; + $rv->{path} = $path; + $rv->{repo} = $repo_path; + + # snapshots: + my $snap = (split('/', $repo_path))[-1]; + $snap =~ s/\.git\z//; # seems common for git URLs to end in ".git" + $rv->{snapshot_re} = qr/\A\Q$snap\E[-_]/; + $rv->{snapshot_pfx} = $snap; + + foreach my $key (qw(publicinbox vcs readme group snapshots)) { + $rv->{$key} = $self->{"repo.$repo_path.$key"}; + } + unless (defined $rv->{snapshots}) { + $rv->{snapshots} = $self->{'repobrowse.snapshots'} || ''; + } + + my %disabled; + foreach (split(/\s+/, $rv->{snapshots})) { + s/\A-// and $disabled{$_} = 1; + } + $rv->{snapshots_disabled} = \%disabled; + + my $g = $rv->{group}; + defined $g or $g = '-none'; + if (ref($g) eq 'ARRAY') { + push @{$self->{-groups}->{$_} ||= []}, $repo_path foreach @$g; + } else { + push @{$self->{-groups}->{$g} ||= []}, $repo_path; + } + + # of course git is the default VCS + $rv->{vcs} ||= 'git'; + $self->{-cache}->{$repo_path} = PublicInbox::Repo->new($rv); +} + +1; diff --git a/lib/PublicInbox/RepoGit.pm b/lib/PublicInbox/RepoGit.pm new file mode 100644 index 00000000..b44457ca --- /dev/null +++ b/lib/PublicInbox/RepoGit.pm @@ -0,0 +1,69 @@ +# Copyright (C) 2015 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ (https://www.gnu.org/licenses/agpl-3.0.txt) + +# common functions used by other RepoGit* modules +package PublicInbox::RepoGit; +use strict; +use warnings; +use base qw(Exporter); +our @EXPORT_OK = qw(git_unquote git_commit_title git_dec_links); +use PublicInbox::Hval qw(utf8_html); + +my %GIT_ESC = ( + a => "\a", + b => "\b", + f => "\f", + n => "\n", + r => "\r", + t => "\t", + v => "\013", +); + +sub git_unquote ($) { + my ($s) = @_; + return $s unless ($s =~ /\A"(.*)"\z/); + $s = $1; + $s =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g; + $s =~ s/\\([0-7]{1,3})/chr(oct($1))/ge; + $s; +} + +# Remove, hilariously slow +sub git_commit_title ($$) { + my ($git, $obj) = @_; # PublicInbox::Git, $sha1hex + my $rv; + eval { + my $buf = $git->cat_file($obj); + ($rv) = ($$buf =~ /\r?\n\r?\n([^\r\n]+)\r?\n?/); + }; + $rv; +} + +# example inputs: "HEAD -> master", "tag: v1.0.0", +sub git_dec_links ($$) { + my ($rel, $D) = @_; + my @l; + foreach (split /, /, $D) { + if (/\A(\S+) -> (\S+)/) { # 'HEAD -> master' + my ($s, $h) = ($1, $2); + $s = utf8_html($s); + $h = PublicInbox::Hval->utf8($h); + my $r = $h->as_href; + $h = $h->as_html; + push @l, qq($s -> <a\nhref="${rel}log?h=$r">$h</a>); + } elsif (s/\Atag: //) { + my $h = PublicInbox::Hval->utf8($_); + my $r = $h->as_href; + $h = $h->as_html; + push @l, qq(<a\nhref="${rel}tag?h=$r"><b>$h</b></a>); + } else { + my $h = PublicInbox::Hval->utf8($_); + my $r = $h->as_href; + $h = $h->as_html; + push @l, qq(<a\nhref="${rel}log?h=$r">$h</a>); + } + } + @l; +} + +1; diff --git a/lib/PublicInbox/RepoGitAtom.pm b/lib/PublicInbox/RepoGitAtom.pm new file mode 100644 index 00000000..6d0caa02 --- /dev/null +++ b/lib/PublicInbox/RepoGitAtom.pm @@ -0,0 +1,170 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# show log as an Atom feed +package PublicInbox::RepoGitAtom; +use strict; +use warnings; +use PublicInbox::Hval qw(utf8_html); +use base qw(PublicInbox::RepoBase); +use PublicInbox::Qspawn; + +use constant DATEFMT => '%Y-%m-%dT%H:%M:%SZ'; +use constant STATES => qw(H ct an ae at s b); +use constant STATE_BODY => (scalar(STATES) - 1); +my $ATOM_FMT = '--pretty=tformat:'. + join('%n', map { "%$_" } STATES).'%x00'; +use POSIX qw(strftime); + +sub repo_root_url { + my ($self, $req) = @_; + my $env = $req->{env}; + my $uri = $env->{REQUEST_URI}; + $uri =~ s/\?.+\z//; # no query string + my @uri = split(m!/+!, $uri); + my @extra = @{$req->{extra}}; + while (@uri && @extra && $uri[-1] eq $extra[-1]) { + pop @uri; + pop @extra; + } + pop @uri if $uri[-1] eq 'atom'; # warn if not equal? + PublicInbox::Repobrowse::base_url($env) . join('/', @uri); +} + +sub flush_hdr ($$$) { + my ($dst, $hdr, $url) = @_; + $$dst .= '<entry><title>'; + $$dst .= utf8_html($hdr->{'s'}); # commit subject + $$dst .= '</title><updated>'; + $$dst .= strftime(DATEFMT, gmtime($hdr->{ct})); + $$dst .= '</updated><author><name>'; + $$dst .= utf8_html($hdr->{an}); + $$dst .= '</name><email>'; + $$dst .= utf8_html($hdr->{ae}); + $$dst .= '</email></author><published>'; + $$dst .= strftime(DATEFMT, gmtime($hdr->{at})); + $$dst .= '</published>'; + $$dst .= qq(<link\nrel="alternate"\ntype="text/html"\nhref="); + $$dst .= $url; + $$dst .= '/commit?id='; + + my $H = $hdr->{H}; + $$dst .= $H; + $$dst .= qq("\n/><id>); + $$dst .= $H; + $$dst .= qq(</id>); + + $$dst .= qq(<content\ntype="xhtml"><div\nxmlns="); + $$dst .= qq(http://www.w3.org/1999/xhtml">); + $$dst .= qq(<pre\nstyle="white-space:pre-wrap">); + undef +} + +sub git_atom_sed ($$) { + my ($self, $req) = @_; + my $buf = ''; + my $state = 0; + my $rel = $req->{relcmd}; + my $repo_info = $req->{repo_info}; + my $title = join('/', $repo_info->{repo}, @{$req->{extra}}); + $title = utf8_html("$title, branch $req->{q}->{h}"); + my $url = repo_root_url($self, $req); + my $hdr = {}; + my $subtitle = $repo_info->desc_html; + $req->{axml} = qq(<?xml version="1.0"?>\n) . + qq(<feed\nxmlns="http://www.w3.org/2005/Atom">) . + qq(<title>$title</title>) . + qq(<subtitle>$subtitle</subtitle>) . + qq(<link\nrel="alternate"\ntype="text/html"\nhref="$url"\n/>); + my ($plinks, $id, $ai); + my $end = ''; + my $blines; + sub { + my $dst; + # $_[0] == scalar buffer, undef means EOF from "git log" + $dst = delete $req->{axml} || ''; + my @tmp; + if (defined $_[0]) { + $buf .= $_[0]; + @tmp = split(/\n/, $buf, -1); + $buf = @tmp ? pop(@tmp) : ''; + } else { + @tmp = split(/\n/, $buf, -1); + $buf = ''; + $end = '</feed>'; + } + + foreach my $l (@tmp) { + if ($state != STATE_BODY) { + $hdr->{((STATES)[$state])} = $l; + if (++$state == STATE_BODY) { + flush_hdr(\$dst, $hdr, $url); + $hdr = {}; + $blines = 0; + } + next; + } + if ($l eq "\0") { + $dst .= qq(</pre></div></content></entry>); + $state = 0; + } else { + $dst .= "\n" if $blines++; + $dst .= utf8_html($l); + } + } + $dst .= $end; + } +} + +sub git_atom_cb { + my ($self, $req) = @_; + sub { + my ($r) = @_; + my $env = $req->{env}; + if (!defined $r) { + my $git = $req->{repo_info}->{git}; + return [ 400, [ 'Content-Type', 'text/plain' ], + [ $git->err ] ]; + } + $env->{'qspawn.filter'} = git_atom_sed($self, $req); + [ 200, [ 'Content-Type', 'application/atom+xml' ] ]; + } +} + +sub call_git_atom { + my ($self, $req) = @_; + my $repo_info = $req->{repo_info}; + my $max = $repo_info->{max_commit_count} || 10; + $max = int($max); + $max = 50 if $max == 0; + + my $git = $repo_info->{git}; + my $env = $req->{env}; + my $q =$req->{'q'} = PublicInbox::RepoGitQuery->new($env); + my $h = $q->{h}; + my $read_log = sub { + my $cmd = $git->cmd(qw(log --no-notes --no-color + --abbrev-commit), $git->abbrev, + $ATOM_FMT, "-$max", $h, '--'); + my $expath = $req->{expath}; + push @$cmd, $expath if $expath ne ''; + my $rdr = { 2 => $git->err_begin }; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, undef, $rdr); + $qsp->psgi_return($env, undef, git_atom_cb($self, $req)); + }; + + sub { + $env->{'qspawn.response'} = $_[0]; + return $read_log->() if $h ne ''; + + my $cmd = $git->cmd(qw(symbolic-ref --short HEAD)); + my $rdr = { 2 => $git->err_begin }; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, undef, $rdr); + $qsp->psgi_qx($env, undef, sub { + chomp($h = ${$_[0]}); + $read_log->(); + }) + } +} + +1; diff --git a/lib/PublicInbox/RepoGitBlob.pm b/lib/PublicInbox/RepoGitBlob.pm new file mode 100644 index 00000000..586b4acc --- /dev/null +++ b/lib/PublicInbox/RepoGitBlob.pm @@ -0,0 +1,77 @@ +# Copyright (C) 2015-2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# Show a blob as-is +package PublicInbox::RepoGitBlob; +use strict; +use warnings; +use base qw(PublicInbox::RepoBase); +use base qw(Exporter); +our @EXPORT = qw(git_blob_mime_type git_blob_stream_response); + +sub call_git_blob { + my ($self, $req) = @_; + my $git = $req->{repo_info}->{git}; + my $q = PublicInbox::RepoGitQuery->new($req->{env}); + my $id = $q->{id}; + $id eq '' and $id = 'HEAD'; + $id .= ":$req->{expath}"; + + my ($cat, $hex, $type, $size) = $git->cat_file_begin($id); + return unless defined $cat; + + my ($r, $buf); + my $left = $size; + if ($type eq 'blob') { + $type = git_blob_mime_type($self, $req, $cat, \$buf, \$left); + } elsif ($type eq 'commit' || $type eq 'tag') { + $type = 'text/plain; charset=UTF-8'; + } else { + $type = 'application/octet-stream'; + } + git_blob_stream_response($git, $cat, $size, $type, $buf, $left); +} + +sub git_blob_mime_type { + my ($self, $req, $cat, $buf, $left) = @_; + my $base = $req->{extra}->[-1]; + my $type = $self->mime_type($base) if defined $base; + return $type if $type; + + my $to_read = 8000; # git uses this size to detect binary files + $to_read = $$left if $to_read > $$left; + my $r = read($cat, $$buf, $to_read); + if (!defined $r || $r <= 0) { + my $git = $req->{repo_info}->{git}; + $git->cat_file_finish($$left); + return; + } + $$left -= $r; + (index($buf, "\0") < 0) ? 'text/plain; charset=UTF-8' + : 'application/octet-stream'; +} + +sub git_blob_stream_response { + my ($git, $cat, $size, $type, $buf, $left) = @_; + + sub { + my ($res) = @_; + my $to_read = 8192; + eval { + my $fh = $res->([ 200, ['Content-Length' => $size, + 'Content-Type' => $type]]); + $fh->write($buf) if defined $buf; + while ($left > 0) { + $to_read = $left if $to_read > $left; + my $r = read($cat, $buf, $to_read); + last if (!defined $r || $r <= 0); + $left -= $r; + $fh->write($buf); + } + $fh->close; + }; + $git->cat_file_finish($left); + } +} + +1; diff --git a/lib/PublicInbox/RepoGitCommit.pm b/lib/PublicInbox/RepoGitCommit.pm new file mode 100644 index 00000000..e98c3c18 --- /dev/null +++ b/lib/PublicInbox/RepoGitCommit.pm @@ -0,0 +1,201 @@ +# Copyright (C) 2015 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# shows the /commit/ endpoint for git repositories +# +# anchors used: +# D - diffstat +# P - parents +# ...and various filenames from to_attr +# The 'D' and 'P' anchors may conflict with odd filenames, but we won't +# punish the common case with extra bytes if somebody uses 'D' or 'P' +# in filenames. + +package PublicInbox::RepoGitCommit; +use strict; +use warnings; +use base qw(PublicInbox::RepoBase); +use PublicInbox::Hval qw(utf8_html to_attr); +use PublicInbox::RepoGit qw(git_unquote git_commit_title); +use PublicInbox::RepoGitDiffCommon; +use PublicInbox::Qspawn; + +use constant GIT_FMT => '--pretty=format:'.join('%n', + '%H', '%h', '%s', '%an <%ae>', '%ai', '%cn <%ce>', '%ci', + '%t', '%p', '%D', '%b%x00'); + +use constant CC_EMPTY => " This is a merge, and the combined diff is empty.\n"; +use constant CC_MERGE => " This is a merge, showing combined diff:\n\n"; + +sub commit_header { + my ($self, $req) = @_; + my ($H, $h, $s, $au, $ad, $cu, $cd, $t, $p, $D, $rest) = + split("\n", $req->{dbuf}, 11); + $s = utf8_html($s); + $au = utf8_html($au); + $cu = utf8_html($cu); + my @p = split(' ', $p); + + my $rel = $req->{relcmd}; + my $q = $req->{'q'}; + my $qs = $req->{qs} = $q->qs(id => $h); + my $x = $self->html_start($req, $s) . "\n" . + qq( commit $H (<a\nhref="${rel}patch$qs">patch</a>)\n) . + qq( tree <a\nrel=nofollow\nhref="${rel}tree?id=$h">$t</a>); + + my $git = $req->{repo_info}->{git}; + # extra show path information, if any + my $extra = $req->{extra}; + my $path = ''; + if (@$extra) { + my @t; + my $ep; + $x .= ' -- '; + $x .= join('/', map { + push @t, $_; + my $e = PublicInbox::Hval->utf8($_, join('/', @t)); + $ep = $e->as_path; + my $eh = $e->as_html; + $ep = "${rel}tree/$ep?id=$h"; + qq(<a\nrel=nofollow\nhref="$ep">$eh</a>); + } @$extra); + $path = "/$ep"; + } + + $x .= "\n author $au\t$ad\ncommitter $cu\t$cd\n"; + my $np = scalar @p; + if ($np == 1) { + my $p = $p[0]; + $x .= git_parent_line(' parent', $p, $q, $git, $rel, $path); + } elsif ($np > 1) { + $req->{mhelp} = CC_MERGE; + my @common = ($q, $git, $rel, $path); + my @t = @p; + my $p = shift @t; + $x .= git_parent_line(' parents', $p, @common); + foreach $p (@t) { + $x .= git_parent_line(' ', $p, @common); + } + } + $x .= "\n<b>"; + $x .= $s; + $x .= "</b>\n\n"; + my $bx00; + + # FIXME: deal with excessively long commit message bodies + ($bx00, $req->{dbuf}) = split("\0", $rest, 2); + $req->{anchors} = {}; + $req->{h} = $h; + $req->{p} = \@p; + $x .= utf8_html($bx00) . "<a\nid=D>---</a>\n"; +} + +sub git_commit_sed ($$) { + my ($self, $req) = @_; + git_diff_sed_init($req); + my $dbuf = \($req->{dbuf}); + + # this filters for $fh->write or $body->getline (see Qspawn) + sub { + my $dst = ''; + if (defined $_[0]) { # $_[0] == scalar buffer + $$dbuf .= $_[0]; + if ($req->{dstate} == DSTATE_INIT) { + return $dst if index($$dbuf, "\0") < 0; + $req->{dstate} = DSTATE_STAT; + $dst .= commit_header($self, $req); + } + git_diff_sed_run(\$dst, $req); + } else { # undef means EOF from "git show", flush the last bit + git_diff_sed_close(\$dst, $req); + $dst .= CC_EMPTY if delete $req->{mhelp}; + show_unchanged(\$dst, $req); + $dst .= '</pre></body></html>'; + } + $dst; + } +} + +sub call_git_commit { # RepoBase calls this + my ($self, $req) = @_; + my $env = $req->{env}; + my $q = PublicInbox::RepoGitQuery->new($env); + my $id = $q->{id}; + $id eq '' and $id = 'HEAD'; + + my $expath = $req->{expath}; + if ($expath ne '') { + my $relup = join('', map { '../' } @{$req->{extra}}); + my $qs = $q->qs; + return $self->r(301, $req, "$relup$qs#".to_attr($expath)); + } + + my $git = $req->{repo_info}->{git}; + my $cmd = $git->cmd(qw(show -z --numstat -p --encoding=UTF-8 + --no-notes --no-color -c), + $git->abbrev, GIT_FMT, $id, '--'); + my $rdr = { 2 => $git->err_begin }; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr); + $req->{'q'} = $q; + $env->{'qspawn.quiet'} = 1; + $qsp->psgi_return($env, undef, sub { # parse header + my ($r, $bref) = @_; + if (!defined $r) { + my $errmsg = $git->err; + [ 500, [ 'Content-Type', 'text/html' ], [ $errmsg ] ]; + } elsif ($r == 0) { + git_commit_404($req); + } else { + $env->{'qspawn.filter'} = git_commit_sed($self, $req); + [ 200, [ 'Content-Type', 'text/html' ] ]; + } + }); +} + +sub git_commit_404 { + my ($req) = @_; + my $x = 'Missing commit or path'; + my $pfx = "$req->{relcmd}commit"; + + my $try = 'try'; + $x = "<html><head><title>$x</title></head><body><pre><b>$x</b>\n\n"; + my $qs = $req->{'q'}->qs(id => ''); + $x .= "<a\nhref=\"$pfx$qs\">$try the latest commit in HEAD</a>\n"; + $x .= '</pre></body>'; + + [404, ['Content-Type'=>'text/html'], [ $x ]]; +} + +# FIXME: horrifically expensive... +sub git_parent_line { + my ($pfx, $p, $q, $git, $rel, $path) = @_; + my $qs = $q->qs(id => $p); + my $t = git_commit_title($git, $p); + $t = defined $t ? utf8_html($t) : ''; + $pfx . qq( <a\nid=P\nhref="${rel}commit$path$qs">$p</a> $t\n); +} + +# do not break anchor links if the combined diff doesn't show changes: +sub show_unchanged { + my ($dst, $req) = @_; + + my @unchanged = sort keys %{$req->{anchors}}; + return unless @unchanged; + my $anchors = $req->{anchors}; + $$dst .= "\n There are uninteresting changes from this merge.\n" . + qq( See the <a\nhref="#P">parents</a>, ) . + "or view final state(s) below:\n\n"; + my $rel = $req->{relcmd}; + my $qs = $req->{qs}; + foreach my $anchor (@unchanged) { + my $fn = $anchors->{$anchor}; + my $p = PublicInbox::Hval->utf8(git_unquote($fn)); + $p = $p->as_path; + $fn = utf8_html($fn); + $$dst .= qq(\t<a\nrel=nofollow); + $$dst .= qq(\nid="$anchor"\nhref="${rel}tree/$p$qs">); + $$dst .= "$fn</a>\n"; + } +} + +1; diff --git a/lib/PublicInbox/RepoGitDiff.pm b/lib/PublicInbox/RepoGitDiff.pm new file mode 100644 index 00000000..bb71e738 --- /dev/null +++ b/lib/PublicInbox/RepoGitDiff.pm @@ -0,0 +1,73 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# shows the /diff endpoint for git repositories for cgit compatibility +# usage: /repo.git/diff?id=COMMIT_ID&id2=COMMIT_ID2 +# +# We probably will not link to this outright because it's expensive, +# but exists to preserve URL compatibility with cgit. +package PublicInbox::RepoGitDiff; +use strict; +use warnings; +use base qw(PublicInbox::RepoBase); +use PublicInbox::Hval qw(utf8_html); +use PublicInbox::RepoGitDiffCommon; +use PublicInbox::Qspawn; + +sub git_diff_sed ($$) { + my ($self, $req) = @_; + git_diff_sed_init($req); + $req->{dstate} = DSTATE_STAT; + # this filters for $fh->write or $body->getline (see Qspawn) + sub { + my $dst = delete $req->{dhtml} || ''; + if (defined $_[0]) { # $_[0] == scalar buffer + $req->{dbuf} .= $_[0]; + git_diff_sed_run(\$dst, $req); + } else { # undef means EOF from "git show", flush the last bit + git_diff_sed_close(\$dst, $req); + $dst .= '</pre></body></html>'; + } + $dst; + } +} + +sub call_git_diff { + my ($self, $req) = @_; + my $env = $req->{env}; + my $q = PublicInbox::RepoGitQuery->new($env); + my $id = $q->{id}; + my $id2 = $q->{id2}; + + my $git = $req->{repo_info}->{git}; + my $cmd = $git->cmd(qw(diff-tree -z --numstat -p --encoding=UTF-8 + --no-color -M -B -D -r), $id2, $id, '--'); + my $expath = $req->{expath}; + push @$cmd, $expath if $expath ne ''; + my $o = { nofollow => 1, noindex => 1 }; + my $ex = $expath eq '' ? '' : " $expath"; + $req->{dhtml} = $self->html_start($req, 'diff', $o). "\n\n". + utf8_html("git diff-tree -r -M -B -D ". + "$id2 $id --$ex"). "\n\n"; + $req->{p} = [ $id2 ]; + $req->{h} = $id; + my $rdr = { 2 => $git->err_begin }; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr); + # $env->{'qspawn.quiet'} = 1; + $qsp->psgi_return($env, undef, sub { # parse header + my ($r) = @_; + if (!defined $r) { + [ 500, [ 'Content-Type', 'text/html' ], [ $git->err ]]; + } elsif ($r == 0) { + [ 200, [ 'Content-Type', 'text/html' ], [ + delete($req->{dhtml}). + 'No differences</pre></body></html>' ] + ] + } else { + $env->{'qspawn.filter'} = git_diff_sed($self, $req); + [ 200, [ 'Content-Type', 'text/html' ] ]; + } + }); +} + +1; diff --git a/lib/PublicInbox/RepoGitDiffCommon.pm b/lib/PublicInbox/RepoGitDiffCommon.pm new file mode 100644 index 00000000..0604f9dd --- /dev/null +++ b/lib/PublicInbox/RepoGitDiffCommon.pm @@ -0,0 +1,297 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# common git diff-related code +package PublicInbox::RepoGitDiffCommon; +use strict; +use warnings; +use PublicInbox::RepoGit qw/git_unquote git_commit_title/; +use PublicInbox::Hval qw/utf8_html to_attr/; +use base qw/Exporter/; +our @EXPORT = qw/git_diff_sed_init git_diff_sed_close git_diff_sed_run + DSTATE_INIT DSTATE_STAT DSTATE_LINES/; + +# index abcdef89..01234567 +sub git_diff_ab_index ($$$) { + my ($xa, $xb, $end) = @_; + # not wasting bandwidth on links here, yet + # links in hunk headers are far more useful with line offsets + $end = utf8_html($end); + "index $xa..$xb$end"; +} + +# diff --git a/foo.c b/bar.c +sub git_diff_ab_hdr ($$$) { + my ($req, $fa, $fb) = @_; + my $html_a = utf8_html($fa); + my $html_b = utf8_html($fb); + $fa = git_unquote($fa); + $fb = git_unquote($fb); + $fa =~ s!\Aa/!!; + $fb =~ s!\Ab/!!; + my $anchor = to_attr($fb); + delete $req->{anchors}->{$anchor}; + $fa = $req->{fa} = PublicInbox::Hval->utf8($fa); + $fb = $req->{fb} = PublicInbox::Hval->utf8($fb); + $req->{path_a} = $fa->as_path; + $req->{path_b} = $fb->as_path; + + # not wasting bandwidth on links here + # links in hunk headers are far more useful with line offsets + qq(<a\nid="$anchor">diff</a> --git $html_a $html_b); +} + +# diff (--cc|--combined) +sub git_diff_cc_hdr { + my ($req, $combined, $path) = @_; + my $html_path = utf8_html($path); + $path = git_unquote($path); + my $anchor = to_attr($path); + delete $req->{anchors}->{$anchor}; + my $cc = $req->{cc} = PublicInbox::Hval->utf8($path); + $req->{path_cc} = $cc->as_path; + qq(<a\nid="$anchor">diff</a> --$combined $html_path); +} + +# @@ -1,2 +3,4 @@ (regular diff) +sub git_diff_ab_hunk ($$$$) { + my ($req, $ca, $cb, $ctx) = @_; + my ($na) = ($ca =~ /\A-(\d+)/); + my ($nb) = ($cb =~ /\A\+(\d+)/); + + # we add "rel=nofollow" here to reduce load on search engines, here + my $rel = $req->{relcmd}; + my $rv = '@@ '; + if (defined($na) && $na == 0) { # new file + $rv .= $ca; + } else { + $na = defined $na ? "#n$na" : ''; + my $p = $req->{p}->[0]; + $rv .= qq(<a\nrel=nofollow); + $rv .= qq(\nhref="${rel}tree/$req->{path_a}?id=$p$na">); + $rv .= "$ca</a>"; + } + $rv .= ' '; + if (defined($nb) && $nb == 0) { # deleted file + $rv .= $cb; + } else { + my $h = $req->{h}; + $nb = defined $nb ? "#n$nb" : ''; + $rv .= qq(<a\nrel=nofollow); + $rv .= qq(\nhref="${rel}tree/$req->{path_b}?id=$h$nb">); + $rv .= "$cb</a>"; + } + $rv . ' @@' . utf8_html($ctx); +} + +# index abcdef09,01234567..76543210 +sub git_diff_cc_index { + my ($req, $before, $last, $end) = @_; + $end = utf8_html($end); + my @before = split(',', $before); + $req->{pobj_cc} = \@before; + + # not wasting bandwidth on links here, yet + # links in hunk headers are far more useful with line offsets + "index $before..$last$end"; +} + +# @@@ -1,2 -3,4 +5,6 @@@ (combined diff) +sub git_diff_cc_hunk { + my ($req, $at, $offs, $ctx) = @_; + my @offs = split(' ', $offs); + my $last = pop @offs; + my @p = @{$req->{p}}; + my @pobj = @{$req->{pobj_cc}}; + my $path = $req->{path_cc}; + my $rel = $req->{relcmd}; + my $rv = $at; + + # special 'cc' action as we don't have reliable paths from parents + my $ppath = "${rel}cc/$path"; + foreach my $off (@offs) { + my $p = shift @p; + my $obj = shift @pobj; # blob SHA-1 + my ($n) = ($off =~ /\A-(\d+)/); # line number + + if ($n == 0) { # new file (does this happen with --cc?) + $rv .= " $off"; + } else { + $rv .= " <a\nhref=\"$ppath?id=$p&obj=$obj#n$n\">"; + $rv .= "$off</a>"; + } + } + + # we can use the normal 'tree' endpoint for the result + my ($n) = ($last =~ /\A\+(\d+)/); # line number + if ($n == 0) { # deleted file (does this happen with --cc?) + $rv .= " $last"; + } else { + my $h = $req->{h}; + $rv .= qq( <a\nrel=nofollow); + $rv .= qq(\nhref="${rel}tree/$path?id=$h#n$n">$last</a>); + } + $rv .= " $at" . utf8_html($ctx); +} + +sub git_diffstat_rename ($$$) { + my ($req, $from, $to) = @_; + my $anchor = to_attr(git_unquote($to)); + $req->{anchors}->{$anchor} = $to; + my @from = split('/', $from); + my @to = split('/', $to); + my $orig_to = $to; + my ($base, @base); + while (@to && @from && $to[0] eq $from[0]) { + push @base, shift(@to); + shift @from; + } + + $base = utf8_html(join('/', @base)) if @base; + $from = utf8_html(join('/', @from)); + $to = PublicInbox::Hval->utf8(join('/', @to), $orig_to); + my $tp = $to->as_path; + my $th = $to->as_html; + $to = qq(<a\nhref="#$anchor">$th</a>); + @base ? "$base/{$from => $to}" : "$from => $to"; +} + +sub DSTATE_INIT () { 0 } +sub DSTATE_STAT () { 1 } +sub DSTATE_LINES () { 2 } + +sub git_diff_sed_init ($) { + my ($req) = @_; + $req->{dbuf} = ''; + $req->{ndiff} = $req->{nchg} = $req->{nadd} = $req->{ndel} = 0; + $req->{dstate} = DSTATE_INIT; +} + +sub git_diff_sed_stat ($$) { + my ($dst, $req) = @_; + my @stat = split(/\0/, $req->{dbuf}, -1); + my $eos; + my $nchg = \($req->{nchg}); + my $nadd = \($req->{nadd}); + my $ndel = \($req->{ndel}); + if (!$req->{dstat_started}) { + $req->{dstat_started} = 1; + + # merges start with an extra '\0' before the diffstat + # non-merge commits start with an extra '\n', instead + if ($req->{mhelp}) { + if ($stat[0] eq '') { + shift @stat; + } else { + warn +'initial merge diffstat line was not empty'; + } + } else { + # for commits, only (not diff-tree) + $stat[0] =~ s/\A\n//s; + } + } + while (defined(my $l = shift @stat)) { + if ($l eq '') { + $eos = 1 if $stat[0] && $stat[0] =~ /\Ad/; # "diff --" + last; + } elsif ($l =~ /\Adiff /) { + unshift @stat, $l; + $eos = 1; + last; + } + $l =~ /\A(\S+)\t+(\S+)\t+(.*)/ or next; + my ($add, $del, $fn) = ($1, $2, $3); + if ($fn ne '') { # normal modification + # TODO: discard diffs if they are too big + # gigantic changes with many files may still OOM us + my $anchor = to_attr(git_unquote($fn)); + $req->{anchors}->{$anchor} = $fn; + $l = utf8_html($fn); + $l = qq(<a\nhref="#$anchor">$l</a>); + } else { # rename + # incomplete... + if (scalar(@stat) < 2) { + unshift @stat, $l; + last; + } + my $from = shift @stat; + my $to = shift @stat; + $l = git_diffstat_rename($req, $from, $to); + } + + # text changes show numerically, Binary does not + if ($add =~ /\A\d+\z/) { + $$nadd += $add; + $$ndel += $del; + $add = "+$add"; + $del = "-$del"; + } + ++$$nchg; + my $num = sprintf('% 6s/%-6s', $del, $add); + $$dst .= " $num\t$l\n"; + } + + $req->{dbuf} = join("\0", @stat); + return unless $eos; + + $req->{dstate} = DSTATE_LINES; + $$dst .= "\n $$nchg "; + $$dst .= $$nchg == 1 ? 'file changed, ' : 'files changed, '; + $$dst .= $$nadd; + $$dst .= $$nadd == 1 ? ' insertion(+), ' : ' insertions(+), '; + $$dst .= $$ndel; + $$dst .= $$ndel == 1 ? " deletion(-)\n\n" : " deletions(-)\n\n"; +} + +sub git_diff_sed_lines ($$) { + my ($dst, $req) = @_; + + # TODO: discard diffs if they are too big + + my @dlines = split(/\n/, $req->{dbuf}, -1); + $req->{dbuf} = ''; + + if (my $help = delete $req->{mhelp}) { + $$dst .= $help; # CC_MERGE + } + + # don't touch the last line, it may not be terminated + $req->{dbuf} .= pop @dlines; + + my $ndiff = \($req->{ndiff}); + my $cmt = '[a-f0-9]+'; + while (defined(my $l = shift @dlines)) { + if ($l =~ m{\Adiff --git ("?a/.+) ("?b/.+)\z}) { # regular + $$dst .= git_diff_ab_hdr($req, $1, $2) . "\n"; + } elsif ($l =~ m{\Adiff --(cc|combined) (.+)\z}) { + $$dst .= git_diff_cc_hdr($req, $1, $2) . "\n"; + } elsif ($l =~ /\Aindex ($cmt)\.\.($cmt)(.*)\z/o) { # regular + $$dst .= git_diff_ab_index($1, $2, $3) . "\n"; + } elsif ($l =~ /\A@@ (\S+) (\S+) @@(.*)\z/) { # regular + $$dst .= git_diff_ab_hunk($req, $1, $2, $3) . "\n"; + } elsif ($l =~ /\Aindex ($cmt,[^\.]+)\.\.($cmt)(.*)$/o) { #--cc + $$dst .= git_diff_cc_index($req, $1, $2, $3) . "\n"; + } elsif ($l =~ /\A(@@@+) (\S+.*\S+) @@@+(.*)\z/) { # --cc + $$dst .= git_diff_cc_hunk($req, $1, $2, $3) . "\n"; + } else { + $$dst .= utf8_html($l) . "\n"; + } + ++$$ndiff; + } +} + +sub git_diff_sed_run ($$) { + my ($dst, $req) = @_; + $req->{dstate} == DSTATE_STAT and git_diff_sed_stat($dst, $req); + $req->{dstate} == DSTATE_LINES and git_diff_sed_lines($dst, $req); + undef; +} + +sub git_diff_sed_close ($$) { + my ($dst, $req) = @_; + $$dst .= utf8_html(delete $req->{dbuf}); + undef; +} + +1; diff --git a/lib/PublicInbox/RepoGitFallback.pm b/lib/PublicInbox/RepoGitFallback.pm new file mode 100644 index 00000000..5ce469be --- /dev/null +++ b/lib/PublicInbox/RepoGitFallback.pm @@ -0,0 +1,21 @@ +# Copyright (C) 2015 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ (https://www.gnu.org/licenses/agpl-3.0.txt) + +# when no endpoints match, fallback to this and serve a static file +# This can serve Smart HTTP in the future. +package PublicInbox::RepoGitFallback; +use strict; +use warnings; +use base qw(PublicInbox::RepoBase); +use PublicInbox::GitHTTPBackend; + +# overrides PublicInbox::RepoBase::call +sub call { + my ($self, undef, $req) = @_; + my $expath = $req->{expath}; + return if index($expath, '..') >= 0; # prevent path traversal + my $git = $req->{repo_info}->{git}; + PublicInbox::GitHTTPBackend::serve($req->{env}, $git, $expath); +} + +1; diff --git a/lib/PublicInbox/RepoGitLog.pm b/lib/PublicInbox/RepoGitLog.pm new file mode 100644 index 00000000..9cfa526e --- /dev/null +++ b/lib/PublicInbox/RepoGitLog.pm @@ -0,0 +1,152 @@ +# Copyright (C) 2015 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# show the log view +package PublicInbox::RepoGitLog; +use strict; +use warnings; +use PublicInbox::Hval qw(utf8_html); +use base qw(PublicInbox::RepoBase); +use PublicInbox::RepoGit qw(git_dec_links git_commit_title); +use PublicInbox::Qspawn; +# cannot rely on --date=format-local:... yet, it is too new (September 2015) +use constant STATES => qw(h p D ai an s b); +use constant STATE_BODY => (scalar(STATES) - 1); +my $LOG_FMT = '--pretty=tformat:'. join('%n', map { "%$_" } STATES).'%x00'; + +sub parent_links { + if (@_ == 1) { # typical, single-parent commit + qq( / parent <a\nhref="#p$_[0]">$_[0]</a>); + } elsif (@_ > 0) { # merge commit + ' / parents ' . + join(' ', map { qq(<a\nhref="#p$_">$_</a>) } @_); + } else { + ''; # root commit + } +} + +sub flush_log_hdr ($$$) { + my ($req, $dst, $hdr) = @_; + my $rel = $req->{relcmd}; + my $seen = $req->{seen}; + $$dst .= '<hr /><pre>' if scalar keys %$seen; + my $id = $hdr->{h}; + $seen->{$id} = 1; + $$dst .= qq(<a\nid=p$id\n); + $$dst .= qq(href="${rel}commit?id=$id"><b>); + $$dst .= utf8_html($hdr->{'s'}); # FIXME may still OOM + $$dst .= '</b></a>'; + my $D = $hdr->{D}; # FIXME: thousands of decorations may OOM us + if ($D ne '') { + $$dst .= ' (' . join(', ', git_dec_links($rel, $D)) . ')'; + } + my @p = split(/ /, $hdr->{p}); + push @{$req->{parents}}, @p; + my $plinks = parent_links(@p); + $$dst .= "\n- "; + $$dst .= utf8_html($hdr->{an}); + $$dst .= " @ $hdr->{ai}\n commit $id$plinks\n"; + undef +} + +sub git_log_sed_end ($$) { + my ($req, $dst) = @_; + $$dst .= '<hr /><pre>'; + my $m = ''; + my $np = 0; + my $seen = $req->{seen}; + my $git = $req->{repo_info}->{git}; + my $rel = $req->{relcmd}; + foreach my $p (@{$req->{parents}}) { + next if $seen->{$p}; + $seen->{$p} = ++$np; + my $s = git_commit_title($git, $p); + $m .= qq(\n<a\nid=p$p\nhref="?h=$p">$p</a>\t); + $s = defined($s) ? utf8_html($s) : ''; + $m .= qq(<a\nhref="${rel}commit?id=$p">$s</a>); + } + if ($np == 0) { + $$dst .= "No commits follow"; + } elsif ($np > 1) { + $$dst .= "Unseen parent commits to follow (multiple choice):\n"; + } else { + $$dst .= "Next parent to follow:\n"; + } + $$dst .= $m; + $$dst .= '</pre></body></html>'; +} + +sub git_log_sed ($$) { + my ($self, $req) = @_; + my $buf = ''; + my $state = 0; + $req->{seen} = {}; + $req->{parents} = []; + my $hdr = {}; + sub { + my $dst; + # $_[0] == scalar buffer, undef means EOF from "git log" + $dst = delete $req->{lhtml} || ''; + my @tmp; + if (defined $_[0]) { + $buf .= $_[0]; + @tmp = split(/\n/, $buf, -1); + $buf = @tmp ? pop(@tmp) : ''; + } else { + @tmp = split(/\n/, $buf, -1); + $buf = undef; + } + + foreach my $l (@tmp) { + if ($state != STATE_BODY) { + $hdr->{((STATES)[$state])} = $l; + if (++$state == STATE_BODY) { + flush_log_hdr($req, \$dst, $hdr); + $hdr = {}; + } + next; + } + if ($l eq "\0") { + $dst .= qq(</pre>); + $state = 0; + } else { + $dst .= "\n"; + $dst .= utf8_html($l); + } + } + git_log_sed_end($req, \$dst) unless defined $buf; + $dst; + }; +} + +sub call_git_log { + my ($self, $req) = @_; + my $repo_info = $req->{repo_info}; + my $max = $repo_info->{max_commit_count} || 50; + $max = int($max); + $max = 50 if $max == 0; + my $env = $req->{env}; + my $q = $req->{'q'} = PublicInbox::RepoGitQuery->new($env); + my $h = $q->{h}; + $h eq '' and $h = 'HEAD'; + my $git = $repo_info->{git}; + my $cmd = $git->cmd(qw(log --no-notes --no-color --abbrev-commit), + $git->abbrev, $LOG_FMT, "-$max", $h, '--'); + my $rdr = { 2 => $git->err_begin }; + my $title = "log: $repo_info->{repo} (" . utf8_html($h). ')'; + $req->{lhtml} = $self->html_start($req, $title) . "\n\n"; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr); + $qsp->psgi_return($env, undef, sub { + my ($r) = @_; + if (!defined $r) { + [ 500, [ 'Content-Type', 'text/html' ], [ $git->err ] ]; + } elsif ($r == 0) { + [ 404, [ 'Content-Type', 'text/html' ], [ $git->err ] ]; + } else { + $env->{'qspawn.filter'} = git_log_sed($self, $req); + [ 200, [ 'Content-Type', 'text/html' ] ]; + } + }); +} + +1; diff --git a/lib/PublicInbox/RepoGitPatch.pm b/lib/PublicInbox/RepoGitPatch.pm new file mode 100644 index 00000000..e9227b6f --- /dev/null +++ b/lib/PublicInbox/RepoGitPatch.pm @@ -0,0 +1,39 @@ +# Copyright (C) 2015 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# shows the /patch/ endpoint for git repositories +# usage: /repo.git/patch?id=COMMIT_ID +package PublicInbox::RepoGitPatch; +use strict; +use warnings; +use base qw(PublicInbox::RepoBase); +use PublicInbox::Qspawn; + +# try to be educational and show the command-line used in the signature +my @CMD = qw(format-patch -M --stdout); +my $sig = '--signature=git '.join(' ', @CMD); + +sub call_git_patch { + my ($self, $req) = @_; + my $git = $req->{repo_info}->{git}; + my $env = $req->{env}; + my $q = PublicInbox::RepoGitQuery->new($env); + my $id = $q->{id}; + $id =~ /\A[\w-]+([~\^][~\^\d])*\z/ or $id = 'HEAD'; + + # limit scope, don't take extra args to avoid wasting server + # resources buffering: + my $range = "$id~1..$id^0"; + my $cmd = $git->cmd(@CMD, $sig." $range", $range, '--'); + my $expath = $req->{expath}; + push @$cmd, $expath if $expath ne ''; + + my $qsp = PublicInbox::Qspawn->new($cmd); + $qsp->psgi_return($env, undef, sub { + my ($r) = @_; + my $h = ['Content-Type', 'text/plain; charset=UTF-8']; + $r ? [ 200, $h ] : [ 500, $h, [ "format-patch error\n" ] ]; + }); +} + +1; diff --git a/lib/PublicInbox/RepoGitPlain.pm b/lib/PublicInbox/RepoGitPlain.pm new file mode 100644 index 00000000..2ba24e08 --- /dev/null +++ b/lib/PublicInbox/RepoGitPlain.pm @@ -0,0 +1,100 @@ +# Copyright (C) 2015-2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +package PublicInbox::RepoGitPlain; +use strict; +use warnings; +use base qw(PublicInbox::RepoBase); +use PublicInbox::RepoGitBlob; +use PublicInbox::Hval qw(utf8_html); +use PublicInbox::Qspawn; + +sub call_git_plain { + my ($self, $req) = @_; + my $git = $req->{repo_info}->{git}; + my $q = PublicInbox::RepoGitQuery->new($req->{env}); + my $id = $q->{id}; + $id eq '' and $id = 'HEAD'; + $id .= ":$req->{expath}"; + my ($cat, $hex, $type, $size) = $git->cat_file_begin($id); + return unless defined $cat; + + my ($r, $buf); + my $left = $size; + if ($type eq 'blob') { + $type = git_blob_mime_type($self, $req, $cat, \$buf, \$left); + } elsif ($type eq 'commit' || $type eq 'tag') { + $type = 'text/plain'; + } elsif ($type eq 'tree') { + $git->cat_file_finish($left); + return git_tree_plain($req, $git, $hex); + } else { + $type = 'application/octet-stream'; + } + git_blob_stream_response($git, $cat, $size, $type, $buf, $left); +} + +sub git_tree_sed ($) { + my ($req) = @_; + my $buf = ''; + my $end; + my $pfx = $req->{tpfx}; + sub { # $_[0] = buffer or undef + my $dst = delete $req->{tstart} || ''; + my @files; + if (defined $_[0]) { + @files = split(/\0/, $buf .= $_[0]); + $buf = pop @files if scalar @files; + } else { + @files = split(/\0/, $buf); + $end = '</ul></body></html>'; + } + foreach my $n (@files) { + $n = PublicInbox::Hval->utf8($n); + my $ref = $n->as_path; + $dst .= qq(<li><a\nhref="$pfx$ref">); + $dst .= $n->as_html; + $dst .= '</a></li>'; + } + $end ? $dst .= $end : $dst; + } +} + +# This should follow the cgit DOM structure in case anybody depends on it, +# not using <pre> here as we don't expect people to actually view it much +sub git_tree_plain { + my ($req, $git, $hex) = @_; + + my @ex = @{$req->{extra}}; + my $rel = $req->{relcmd}; + my $title = utf8_html(join('/', '', @ex, '')); + my $tslash = $req->{tslash}; + my $pfx = $tslash ? './' : 'plain/'; + my $t = "<h2>$title</h2><ul>"; + if (@ex) { + if ($tslash) { + $t .= qq(<li><a\nhref="../">../</a></li>); + } else { + $t .= qq(<li><a\nhref="./">../</a></li>); + my $last = PublicInbox::Hval->utf8($ex[-1])->as_href; + $pfx = "$last/"; + } + } + + $req->{tpfx} = $pfx; + $req->{tstart} = "<html><head><title>$title</title></head><body>".$t; + my $cmd = $git->cmd(qw(ls-tree --name-only -z), $git->abbrev, $hex); + my $rdr = { 2 => $git->err_begin }; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr); + my $env = $req->{env}; + $qsp->psgi_return($env, undef, sub { + my ($r) = @_; + if (!defined $r) { + [ 500, [ 'Content-Type', 'text/plain' ], [ $git->err ]]; + } else { + $env->{'qspawn.filter'} = git_tree_sed($req); + [ 200, [ 'Content-Type', 'text/html' ] ]; + } + }); +} + +1; diff --git a/lib/PublicInbox/RepoGitQuery.pm b/lib/PublicInbox/RepoGitQuery.pm new file mode 100644 index 00000000..638a1316 --- /dev/null +++ b/lib/PublicInbox/RepoGitQuery.pm @@ -0,0 +1,50 @@ +# Copyright (C) 2015 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# query parameter management for repobrowse +package PublicInbox::RepoGitQuery; +use strict; +use warnings; +use PublicInbox::Hval; +use URI::Escape qw(uri_unescape); +my @KNOWN_PARAMS = qw(id id2 h ofs); + +sub new { + my ($class, $env) = @_; + # we don't care about multi-value + my %tmp = map { + my ($k, $v) = split('=', uri_unescape($_), 2); + $v = '' unless defined $v; + $v =~ tr/+/ /; + ($k, $v) + } split(/[&;]/, $env->{QUERY_STRING}); + + my $self = {}; + foreach (@KNOWN_PARAMS) { + my $v = $tmp{$_}; + $self->{$_} = defined $v ? $v : ''; + } + bless $self, $class; +} + +sub qs { + my ($self, %over) = @_; + + if (keys %over) { + my $tmp = bless { %$self }, ref($self); + foreach my $k (keys %over) { $tmp->{$k} = $over{$k}; } + $self = $tmp; + } + + my @qs; + foreach my $k (@KNOWN_PARAMS) { + my $v = $self->{$k}; + + next if ($v eq ''); + $v = PublicInbox::Hval->new($v)->as_href; + push @qs, "$k=$v"; + } + scalar(@qs) ? ('?' . join('&', @qs)) : ''; +} + +1; diff --git a/lib/PublicInbox/RepoGitSearch.pm b/lib/PublicInbox/RepoGitSearch.pm new file mode 100644 index 00000000..36e3fab3 --- /dev/null +++ b/lib/PublicInbox/RepoGitSearch.pm @@ -0,0 +1,179 @@ +# Copyright (C) 2017 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# Read-only search interface for use by the Repobrowse web interface +# RepoGitSearchIdx builds upon this for writing a Xapian DB. +package PublicInbox::RepoGitSearch; +use strict; +use warnings; +use Search::Xapian qw/:standard/; + +# values for ranges and sorting +use constant { + CD => 0, # commit date stamp (YYYYMMDD) + AD => 1, # author date stamp (YYYYMMDD) + + REPO_SCHEMA_VERSION => 1, + # n.b. FLAG_PURE_NOT is expensive not suitable for a public website + # as it could become a denial-of-service vector + QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD, +}; +our $LANG = 'english'; + +my %bool_pfx_internal = ( + type => 'T', # "commit", "tag", or "ref" +); + +my %bool_pfx_external = (); + +my %prob_prefix = ( + id => 'Q', # git object ID, partial matches supported + p => 'XP', # parent commit (partial) + s => 'S', # subject + a => 'A', # Author name + email + c => 'XC', # Committer name + email + ac => 'A XC', # Author and Committer name + email + b => 'XBODY', # commit message body + bs => 'S XBODY', # commit message (subject + body) + diff_fn => 'XDFN', # changed filenames + diff_hdr => 'XDHH', # diff hunk header + diff_ctx => 'XDCTX', # diff context + diff_a => 'XDFA', # diff a/ file (before) + diff_b => 'XDFB', # diff b/ file (after) + diff => 'XDFN XDHH XDCTX XDFA XDFB', # entire diff + preimg => 'XPRE', # blob pre-image (full) + postimg => 'XPOST', # blob post-image (full) + # default: + '' => 'Q XP S A XC XBODY XDFN XDHH XDCTX XDFA XDFB XPRE XPOST', +); + +our @HELP = ( + 's:' => 'match within message subject e.g. s:"a quick brown fox"', + 'ad:' => <<EOF, +Author date range as YYYYMMDD e.g. ad:19931002..20101002 +Open-ended ranges such as ad:19931002.. and ad:..20101002 +are also supported +EOF + 'cd:' => 'Committer date range as YYYYMMDD, see ad: above', + 'b:' => 'match within commit message body', + 'bs:' => 'match within the commit message subject and body', +); +chomp @HELP; + +sub new { + my ($class, $git_dir, $repo_dir) = @_; + $repo_dir ||= "$git_dir/public-inbox"; + my $xdir = "$repo_dir/xr".REPO_SCHEMA_VERSION; + bless { git_dir => $git_dir, xdir => $xdir }, $class; +} + +# overriden by RepoGitSearchIdx +sub xdb ($) { $_[0]->{xdb} ||= Search::Xapian::Database->new($_[0]->{xdir}) } + +sub retry_reopen ($$) { + my ($self, $cb) = @_; + my $ret; + for (1..3) { + eval { $ret = $cb->() }; + return $ret unless $@; + # Exception: The revision being read has been discarded - + # you should call Xapian::Database::reopen() + if (ref($@) eq 'Search::Xapian::DatabaseModifiedError') { + $self->{xdb}->reopen; + } else { + die; + } + } +} + +sub _enquire_once ($$$) { + my ($self, $query, $opts) = @_; + my $enq = $self->{enquire} ||= Search::Xapian::Enquire->new($self->xdb); + $enq->set_query($query); + $opts ||= {}; + my $desc = !$opts->{asc}; + if ($opts->{relevance}) { + $enq->set_sort_by_relevance_then_value(AD, $desc); + } else { + $enq->set_sort_by_value_then_relevance(AD, $desc); + } + my $offset = $opts->{offset} || 0; + my $limit = $opts->{limit} || 50; + $enq->get_mset($offset, $limit); +} + +sub _do_enquire ($$$) { + my ($self, $query, $opts) = @_; + retry_reopen($self, sub { _enquire_once($self, $query, $opts) }); +} + +sub stemmer () { Search::Xapian::Stem->new($LANG) } + +# read-only +sub qp ($) { + my ($self) = @_; + + my $qp = $self->{query_parser}; + return $qp if $qp; + + # new parser + $qp = Search::Xapian::QueryParser->new; + $qp->set_default_op(OP_AND); + $qp->set_database($self->xdb); + $qp->set_stemmer(stemmer()); + $qp->set_stemming_strategy(STEM_SOME); + + $qp->add_valuerangeprocessor( + Search::Xapian::NumberValueRangeProcessor->new(AD, 'ad:')); + $qp->add_valuerangeprocessor( + Search::Xapian::NumberValueRangeProcessor->new(CD, 'cd:')); + + while (my ($name, $prefix) = each %bool_pfx_external) { + $qp->add_boolean_prefix($name, $prefix); + } + + while (my ($name, $prefix) = each %prob_prefix) { + $qp->add_prefix($name, $_) foreach split(/ /, $prefix); + } + + $self->{query_parser} = $qp; +} + +# returns begin and end PostingIterator +sub find_docids ($$) { + my ($self, $termval) = @_; + my $db = $self->xdb; + ($db->postlist_begin($termval), $db->postlist_end($termval)); +} + +sub find_unique_docid ($$$) { + my ($self, $termval) = @_; + my ($begin, $end) = find_docids($self, $termval); + return undef if $begin->equal($end); # not found + my $rv = $begin->get_docid; + # sanity check + $begin->inc; + $begin->equal($end) or die "Term '$termval' is not unique\n"; + $rv; +} + +sub help ($) { + my ($self) = @_; + \@HELP; +} + +# read-only +sub query { + my ($self, $query_string, $opts) = @_; + my $query; + + $opts ||= {}; + unless ($query_string eq '') { + $query = qp($self)->parse_query($query_string, QP_FLAGS); + $opts->{relevance} = 1 unless exists $opts->{relevance}; + } + + _do_enquire($self, $query, $opts); +} + +1; diff --git a/lib/PublicInbox/RepoGitSearchIdx.pm b/lib/PublicInbox/RepoGitSearchIdx.pm new file mode 100644 index 00000000..d2b4597e --- /dev/null +++ b/lib/PublicInbox/RepoGitSearchIdx.pm @@ -0,0 +1,387 @@ +# Copyright (C) 2017 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# Qrefs/(tags|heads)/foo => 40-byte SHA1 hex of commit +# Q$SHA1HEX_OF_COMMIT +# +# Indexes any git repository with Xapian; intended for code; +# see PublicInbox::SearchIdx for a mail-specific indexer +package PublicInbox::RepoGitSearchIdx; +use strict; +use warnings; +use base qw(PublicInbox::RepoGitSearch); # base is read-only +use POSIX qw(strftime); +use PublicInbox::Git; +use PublicInbox::GitIdx; +use constant { + Z40 => ('0' x 40), + STATE_GPGSIG => -0x80000000, + DEBUG => !!$ENV{DEBUG}, + BATCH_BYTES => 1_000_000, +}; + +sub new { + my ($class, $git_dir, $repo_dir) = @_; + require Search::Xapian::WritableDatabase; + my $self = $class->SUPER::new($git_dir, $repo_dir); + my $git = $self->{git} = PublicInbox::Git->new($git_dir); + $self->{want_refs_re} = qr!^refs/(?:heads|tags)/!; + $self->{'umask'} = git_umask_for($git); + $self; +} + +sub xdb ($) { + my ($self) = @_; + $self->{xdb} ||= with_umask($self->{'umask'}, sub { + my $xdir = $self->{xdir}; + unless (-d $xdir) { + require File::Path; + File::Path::mkpath($xdir); + } + Search::Xapian::WritableDatabase->new($xdir, + Search::Xapian::DB_CREATE_OR_OPEN); + }); +} + +sub doc_new ($$) { + my ($type, $unique_id) = @_; + my $doc = Search::Xapian::Document->new; + $doc->add_term('T'.$type); + $doc->add_term($unique_id); + $doc; +} + +sub add_val ($$$) { + my ($doc, $col, $num) = @_; + $num = Search::Xapian::sortable_serialise($num); + $doc->add_value($col, $num); +} + +sub each_term_val ($$$$) { + my ($doc, $pfx, $re, $cb) = @_; + my $end = $doc->termlist_end; + my $i = $doc->termlist_begin; + $i->skip_to($pfx); + while ($i != $end) { + my $val = $i->get_termname; + $val =~ s/$re// and $cb->($val); + $i->inc; + } + undef; +} + +sub get_doc ($$$$) { + my ($self, $id_ref, $type, $oid) = @_; + my $doc; + my $doc_id = $self->find_unique_docid('Q'.$oid); + if (defined $doc_id) { + $doc = $self->{xdb}->get_document($doc_id); + } else { + $doc = doc_new($type, 'Q'.$oid); + } + $$id_ref = $doc_id; + $doc; +} + +# increments and returns update generation counter +sub update_id ($) { + my ($self) = @_; + my $db = $self->{xdb}; + my $update_id = int($db->get_metadata('last_update_id') || 0); + $db->set_metadata('last_update_id', ++$update_id); + $update_id; +} + +sub replace_or_add ($$$) { + my ($db, $doc_id, $doc) = @_; + # update our ref: + if (defined $doc_id) { + $db->replace_document($doc_id, $doc); + } else { + $doc_id = $db->add_document($doc); + } + $doc_id; +} + +sub decor_update { + my ($self, $doc, $decor, $oid) = @_; + + # load all current refs + my $want = $self->{want_refs_re}; + ($decor) = ($decor =~ m!\((.+)\)!); + foreach (split(/, /, $decor)) { + my ($sym, $refname, $tag); + if (/^(\S+) -> (\S+)\z/) { + ($sym, $refname) = ($1, $2); + } elsif (s/^tag: //) { + $refname = $_; + $tag = 1; # XXX use this + } else { + $refname = $_; + } + if ($refname =~ $want) { + $self->{-active_refs}->{$refname} = $oid; + } + # TODO: handle $sym, and do something with tags + } +} + +sub term_generator ($) { # write-only + my ($self) = @_; + + $self->{term_generator} ||= eval { + my $tg = Search::Xapian::TermGenerator->new; + $tg->set_stemmer($self->stemmer); + $tg; + }; +} + +sub index_text_inc ($$$) { + my ($tg, $text, $pfx) = @_; + $tg->index_text($text, 1, $pfx); + $tg->increase_termpos; +} + +sub index_blob_id ($$$) { + my ($tg, $blob_id, $pfx) = @_; + index_text_inc($tg, $blob_id, $pfx) if $blob_id ne Z40; +} + +sub each_log_line ($$) { + my ($self, $range) = @_; + my $log = $self->{git}->popen(qw(log --decorate=full --pretty=raw + --no-color --no-abbrev --no-notes + -r --raw -p + ), $range, '--'); + my $db = $self->{xdb}; + my ($doc, $doc_id); + my $tg = term_generator($self); + my $state = 0; # 1: subject, 2: body, 3: diff, 4: diff -c + my $tip; + my $hex = '[a-f0-9]+'; + my ($cc_ins, $cc_del); + my $batch = BATCH_BYTES; + + local $/ = "\n"; + while (defined(my $l = <$log>)) { + $batch -= bytes::length($l); + # prevent memory growth from Xapian + if ($batch <= 0) { + $db->flush; + $batch = BATCH_BYTES; + } + if ($l =~ /^commit (\S+)(\s+\([^\)]+\))?/) { + my ($oid, $decor) = ($1, $2); + replace_or_add($db, $doc_id, $doc) if $doc; + $tip ||= $oid; + $state = 0; + $cc_ins = $cc_del = undef; + + $doc = get_doc($self, \$doc_id, 'commit', $oid); + decor_update($self, $doc, $decor, $oid) if $decor; + # old commit + last if defined $doc_id; + + # new commit: + $tg->set_document($doc); + $doc->set_data($oid); + $doc->add_term('Q' . $oid); + index_text_inc($tg, $oid, 'Q'); + } elsif ($l =~ /^parent (\S+)/) { + my $parent = $1; + index_text_inc($tg, $parent, 'XP'); + } elsif ($l =~ /^author ([^<]*?<[^>]+>) (\d+)/) { + my ($au, $at) = ($1, $2); + index_text_inc($tg, $au, 'A'); + add_val($doc, PublicInbox::RepoGitSearch::AD, + strftime('%Y%m%d', gmtime($at))); + } elsif ($l =~ /^committer ([^<]*?<[^>]+>) (\d+)/) { + my ($cu, $ct) = ($1, $2); + index_text_inc($tg, $cu, 'XC'); + add_val($doc, PublicInbox::RepoGitSearch::CD, + strftime('%Y%m%d', gmtime($ct))); + } elsif ($l =~ /^gpgsig /) { + $state = STATE_GPGSIG; + } elsif ($l =~ /^mergetag /) { + $state = -1; + } elsif ($state < 0) { # inside mergetag or gpgsig + if ($l eq " \n") { # paragraph + $state--; + $tg->increase_termpos; + } elsif ($l eq "-----BEGIN PGP SIGNATURE-----\n") { + # no point in indexing a PGP signature + $state = STATE_GPGSIG; + } elsif ($state == -2) { # mergetag subject + $tg->index_text($l, 1); + $tg->increase_termpos; + } elsif ($state < -2 && $state > STATE_GPGSIG) { + $tg->index_text($l); # mergetag body + } elsif ($l eq "\n") { + # end of mergetag, onto normal commit message + $tg->increase_termpos; + $state = 0; + } elsif ($l =~ /^ (?:tag|tagger|type) /) { + # ignored + } elsif (DEBUG) { + if ($state <= STATE_GPGSIG) { + # skip + } else { + warn "unhandled mergetag: $l"; + } + } + } elsif ($state < 3 && $l =~ s/^ //) { # subject and body + if ($state > 0) { + $l =~ /\S/ ? $tg->index_text($l, 1) + : $tg->increase_termpos; + $state = 2; + } else { + $state = 1; + $tg->index_text($l, 1, 'S') if $l ne "\n"; + } + } elsif ($l =~ /^:\d{6} \d{6} ($hex) ($hex) (\S+)\t+(.+)/o) { + # --raw output (regular) + my ($pre, $post, $chg, $names) = ($1, $2, $3, $4); + index_blob_id($tg, $pre, 'XPRE'); + index_blob_id($tg, $post, 'XPOST'); + } elsif ($l =~ /^(::+)(?:\d{6} )+ ($hex .+)? (\S+)\t+(.+)/o) { + # --raw output (combined) + my ($colons, $blobs, $chg, $names) = ($1, $2, $3, $4); + my @blobs = split(/ /, $blobs); + my $post = pop @blobs; + my $n = length($colons); + if (scalar(@blobs) != $n) { + die "combined raw parsed wrong:\n$l\n//\n"; + } + index_blob_id($tg, $_, 'XPRE') foreach @blobs; + index_blob_id($tg, $post, 'XPOST'); + unless ($cc_ins) { + $n--; + $cc_ins = qr/^ {0,$n}[\+]\s*(.*)/; + $cc_del = qr/^ {0,$n}[\-]\s*(.*)/; + } + } elsif ($l =~ m!^diff --git (?:"?a/.+?) (?:"?b/.+)!) { + # regular diff, filenames handled by --raw + $state = 3; + } elsif ($l =~ /^diff --(?:cc|combined) (?:.+)/) { + # combined diff, filenames handled by --raw + $state = 4; + } elsif ($l =~ /^@@ (?:\S+) (?:\S+) @@(.*)/) { + my $hunk_hdr = $1; + # regular hunk header context + $hunk_hdr =~ /\S/ and + index_text_inc($tg, $hunk_hdr, 'XDHH'); + # not currently handled: + } elsif ($l =~ /^index (?:$hex)\.\.(?:$hex)/o) { + } elsif ($l =~ /^index (?:$hex,[^\.]+)\.\.(?:$hex)(.*)$/o) { + #--cc + } elsif ($l =~ /^(?:@@@+) (?:\S+.*\S+) @@@+\z/) { # --cc + } elsif ($l =~ /^(?:old|new) mode/) { + } elsif ($l =~ /^(?:deleted|new) file mode/) { + } elsif ($l =~ /^tree (?:\S+)/) { + } elsif ($l =~ /^(?:copy|rename) (?:from|to) /) { + } elsif ($l =~ /^(?:dis)?similarity index /) { + } elsif ($l =~ /^\\ No newline at end of file/) { + } elsif ($l =~ /^Binary files .* differ/) { + } elsif ($l =~ /^--- /) { # preimage filename + } elsif ($l =~ /^\+\+\+ /) { # postimage filename + } elsif ($state == 3) { # diff --git + if ($l =~ s/^\+//) { + index_text_inc($tg, $l, 'XDFB'); + } elsif ($l =~ s/^\-//) { + index_text_inc($tg, $l, 'XDFA'); + } elsif ($l =~ s/^ //) { + index_text_inc($tg, $l, 'XDCTX'); + } elsif (DEBUG) { + if ($l eq "\n") { + } else { + warn "unhandled diff -u $l"; + } + } + } elsif ($state == 4) { # diff --cc/combined + if ($l =~ $cc_ins) { + index_text_inc($tg, $1, 'XDFB'); + } elsif ($l =~ $cc_del) { + index_text_inc($tg, $1, 'XDFA'); + } elsif ($l =~ s/^ //) { + index_text_inc($tg, $l, 'XDCTX'); + } elsif (DEBUG) { + if ($l eq "\n") { + } else { + warn "unhandled diff --cc $l"; + } + } + } elsif (DEBUG) { + warn "wtf $state $l\n" if $l ne "\n"; + } + } + replace_or_add($db, $doc_id, $doc) if $doc; + $tip; +} + +sub index_top_ref ($$$) { + my ($self, $refname, $end) = @_; + my $doc_id; + my $db = xdb($self); + my $ref_doc = get_doc($self, \$doc_id, 'ref', $refname); + my $begin = defined $doc_id ? $ref_doc->get_data : ''; + my $active = $self->{-active_refs} = { $refname => undef }; + my $git = $self->{git}; + + # check for discontiguous branches (from "push --force") + if ($begin ne '') { + my $base = $git->qx(qw(merge-base), $begin, $end); + chomp $base; + if ($base ne $begin) { + warn "$refname updated with force\n"; + # TODO: cleanup_forced_update($self, $refname); + $begin = ''; + } + } + my $range = $begin eq '' ? $end : "$begin^0..$end^0"; + my $tip = each_log_line($self, $range); + my $progress = $self->{progress}; + if (defined $tip) { + $ref_doc->set_data($tip); + print $progress "$refname => $tip\n" if $progress; + replace_or_add($db, $doc_id, $ref_doc); + } + $db->flush; + + # update all decorated refs which got snowballed into this one + delete $active->{$refname}; + my $n = 100; + foreach my $ref (keys %$active) { + if (--$n <= 0) { + $db->flush; + $n = 100; + } + $ref_doc = get_doc($self, \$doc_id, 'ref', $ref); + $ref_doc->set_data($active->{$ref}); + if ($progress) { + print $progress "$ref => $active->{$ref} ($refname)\n"; + } + replace_or_add($db, $doc_id, $ref_doc); + } + $db->flush; +} + +# main entry sub: +sub index_sync { + my ($self, $opts) = @_; + $self->{progress} = $opts->{progress}; + my $db = xdb($self); + $self->{-update_id} = update_id($self); + # go for most recent refs, first, since that reduces the amount + # of work we have to do. + my $refs = $self->{git}->popen(qw(for-each-ref --sort=-creatordate)); + local $/ = "\n"; + while (defined(my $line = <$refs>)) { + chomp $line; + my ($oid, $type, $refname) = split(/\s+/, $line); + next unless $refname =~ $self->{want_refs_re}; + next unless $type eq 'commit' || $type eq 'tag'; + index_top_ref($self, $refname, $oid); + } +} + +1; diff --git a/lib/PublicInbox/RepoGitSnapshot.pm b/lib/PublicInbox/RepoGitSnapshot.pm new file mode 100644 index 00000000..e05ad80c --- /dev/null +++ b/lib/PublicInbox/RepoGitSnapshot.pm @@ -0,0 +1,110 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# shows the /snapshot/ endpoint for git repositories +# Mainly for compatibility reasons with cgit, I'm unsure if +# showing this in a repository viewer is a good idea. + +package PublicInbox::RepoGitSnapshot; +use strict; +use warnings; +use base qw(PublicInbox::RepoBase); +use PublicInbox::Git; +use PublicInbox::Qspawn; +our $SUFFIX; +BEGIN { + # as described in git-archive(1), users may add support for + # other compression schemes such as xz or bz2 via git-config(1): + # git config tar.tar.xz.command "xz -c" + # git config tar.tar.bz2.command "bzip2 -c" + chomp(my @l = `git archive --list`); + $SUFFIX = join('|', map { quotemeta $_ } @l); +} + +# Not using standard mime types since the compressed tarballs are +# special or do not match my /etc/mime.types. Choose what gitweb +# and cgit agree on for compatibility. +our %FMT_TYPES = ( + 'tar' => 'application/x-tar', + 'tar.bz2' => 'application/x-bzip2', + 'tar.gz' => 'application/x-gzip', + 'tar.xz' => 'application/x-xz', + 'tgz' => 'application/x-gzip', + 'zip' => 'application/x-zip', +); + +sub call_git_snapshot ($$) { # invoked by PublicInbox::RepoBase::call + my ($self, $req) = @_; + + my @extra = @{$req->{extra}}; + my $ref = shift @extra; + return $self->r(404) if (!defined $ref) || scalar(@extra); + my $orig_fn = $ref; + + # just in case git changes refname rules, don't allow wonky filenames + # to break the Content-Disposition header, either. + return $self->r(404) if $orig_fn =~ /["\s]/s; + return $self->r(404) unless ($ref =~ s/\.($SUFFIX)\z//o); + my $fmt = $1; + my $env = $req->{env}; + my $repo_info = $req->{repo_info}; + + # support disabling certain snapshots types entirely to twart + # URL guessing since it could burn server resources. + return $self->r(404) if $repo_info->{snapshots_disabled}->{$fmt}; + + # strip optional basename (may not exist) + $ref =~ s/$repo_info->{snapshot_re}//; + + # don't allow option/command injection, git refs do not start with '-' + return $self->r(404) if $ref =~ /\A-/; + + my $git = $repo_info->{git}; + my $tree = ''; + my $last_cb = sub { + delete $env->{'repobrowse.tree_cb'}; + delete $env->{'qspawn.quiet'}; + my $pfx = "$repo_info->{snapshot_pfx}-$ref/"; + my $cmd = $git->cmd('archive', + "--prefix=$pfx", "--format=$fmt", $tree); + my $rdr = { 2 => $git->err_begin }; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr); + $qsp->psgi_return($env, undef, sub { + my $r = $_[0]; + return $self->r(500) unless $r; + [ 200, [ 'Content-Type', + $FMT_TYPES{$fmt} || 'application/octet-stream', + 'Content-Disposition', + qq(inline; filename="$orig_fn"), + 'ETag', qq("$tree") ] ]; + }); + }; + + my $cmd = $git->cmd(qw(rev-parse --verify --revs-only)); + # try prefixing "v" or "V" for tag names to get the tree + my @refs = ("V$ref", "v$ref", $ref); + $env->{'qspawn.quiet'} = 1; + my $tree_cb = $env->{'repobrowse.tree_cb'} = sub { + my ($ref) = @_; + if (defined $ref) { + $tree = $$ref; + chomp $tree; + } + return $last_cb->() if $tree ne ''; + unless (scalar(@refs)) { + my $res = delete $env->{'qspawn.response'}; + return $res->($self->r(404)); + } + my $rdr = { 2 => $git->err_begin }; + my $r = pop @refs; + my $qsp = PublicInbox::Qspawn->new([@$cmd, $r], undef, $rdr); + $qsp->psgi_qx($env, undef, $env->{'repobrowse.tree_cb'}); + }; + sub { + $env->{'qspawn.response'} = $_[0]; + # kick off the "loop" foreach @refs + $tree_cb->(undef); + } +} + +1; diff --git a/lib/PublicInbox/RepoGitSummary.pm b/lib/PublicInbox/RepoGitSummary.pm new file mode 100644 index 00000000..e9e1458b --- /dev/null +++ b/lib/PublicInbox/RepoGitSummary.pm @@ -0,0 +1,109 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# The main summary/landing page of a git repository viewer +package PublicInbox::RepoGitSummary; +use strict; +use warnings; +use PublicInbox::Hval qw(utf8_html); +use base qw(PublicInbox::RepoBase); +use PublicInbox::Qspawn; + +sub call_git_summary { + my ($self, $req) = @_; + my $git = $req->{repo_info}->{git}; + my $env = $req->{env}; + + # n.b. we would use %(HEAD) in for-each-ref --format if we could + # rely on git 1.9.0+, but it's too soon for that in early 2017... + my $cmd = $git->cmd(qw(symbolic-ref HEAD)); + my $rdr = { 2 => $git->err_begin }; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr); + sub { + my ($res) = @_; # Plack streaming callback + $qsp->psgi_qx($env, undef, sub { + chomp(my $head_ref = ${$_[0]}); + for_each_ref($self, $req, $res, $head_ref); + }); + } +} + +use constant EACH_REF_FMT => '--format=' . + join(' ', map { "%($_)" } + qw(refname objecttype objectname creatordate:short subject)); + +sub for_each_ref { + my ($self, $req, $res, $head_ref) = @_; + my $count = 10; # TODO: configurable + my $fh; + my $repo_info = $req->{repo_info}; + my $git = $repo_info->{git}; + my $refs = $git->popen(qw(for-each-ref --sort=-creatordate), + EACH_REF_FMT, "--count=$count", + qw(refs/heads/ refs/tags/)); + $fh = $res->([200, ['Content-Type'=>'text/html; charset=UTF-8']]); + # ref names are unpredictable in length and requires tables :< + $fh->write($self->html_start($req, + "$repo_info->{repo}: overview") . + '</pre><table>'); + + my $rel = $req->{relcmd}; + while (<$refs>) { + my ($ref, $type, $hex, $date, $s) = split(' ', $_, 5); + my $x = $ref eq $head_ref ? ' (HEAD)' : ''; + $ref =~ s!\Arefs/(?:heads|tags)/!!; + $ref = PublicInbox::Hval->utf8($ref); + my $h = $ref->as_html; + $ref = $ref->as_href; + my $sref; + if ($type eq 'tag') { + $h = "<b>$h</b>"; + $sref = $ref = $rel . 'tag?h=' . $ref; + } elsif ($type eq 'commit') { + $sref = $rel . 'commit?h=' . $ref; + $ref = $rel . 'log?h=' . $ref; + } else { + # no point in wasting code to support tagged + # trees/blobs... + next; + } + chomp $s; + $fh->write(qq(<tr><td><tt><a\nhref="$ref">$h</a>$x</tt></td>) . + qq(<td><tt>$date <a\nhref="$sref">) . utf8_html($s) . + '</a></tt></td></tr>'); + + } + $fh->write('</table>'); + + # some people will use README.md or even README.sh here... + my $readme = $repo_info->{readme}; + defined $readme or $readme = [ 'README', 'README.md' ]; + $readme = [ $readme ] if (ref($readme) ne 'ARRAY'); + foreach my $r (@$readme) { + my $doc = $git->cat_file('HEAD:'.$r); + defined $doc or next; + $fh->write('<pre>' . readme_path_links($rel, $r) . + " (HEAD)\n\n" . utf8_html($$doc) . '</pre>'); + } + $fh->write('</body></html>'); + $fh->close; +} + +sub readme_path_links { + my ($rel, $readme) = @_; + my @path = split(m!/+!, $readme); + + my $s = "tree <a\nhref=\"${rel}tree\">root</a>/"; + my @t; + $s .= join('/', (map { + push @t, $_; + my $e = PublicInbox::Hval->utf8($_, join('/', @t)); + my $ep = $e->as_path; + my $eh = $e->as_html; + $e = "<a\nhref=\"${rel}tree/$ep\">$eh</a>"; + # bold the last one + scalar(@t) == scalar(@path) ? "<b>$e</b>" : $e; + } @path)); +} + +1; diff --git a/lib/PublicInbox/RepoGitTag.pm b/lib/PublicInbox/RepoGitTag.pm new file mode 100644 index 00000000..96835b2c --- /dev/null +++ b/lib/PublicInbox/RepoGitTag.pm @@ -0,0 +1,213 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# shows the /tag/ endpoint for git repositories +package PublicInbox::RepoGitTag; +use strict; +use warnings; +use base qw(PublicInbox::RepoBase); +use POSIX qw(strftime); +use PublicInbox::Hval qw(utf8_html); +use PublicInbox::Qspawn; + +my %cmd_map = ( # type => action + commit => 'commit', + tag => 'tag', + # tree/blob fall back to 'show' +); + +sub call_git_tag { + my ($self, $req) = @_; + + my $q = PublicInbox::RepoGitQuery->new($req->{env}); + my $h = $q->{h}; + $h eq '' and return git_tag_list($self, $req); + sub { + my ($res) = @_; + git_tag_show($self, $req, $h, $res); + } +} + +sub read_err { + my ($fh, $type, $hex) = @_; + + $fh->write("</pre><hr /><pre><b>error reading $type $hex</b>"); +} + +sub git_show_tag_as_tag { + my ($self, $fh, $req, $h, $cat, $left, $type, $hex) = @_; + my $buf = ''; + my $offset = 0; + while ($$left > 0) { + my $r = read($cat, $buf, $$left, $offset); + unless (defined $r) { + read_err($fh, $type, $hex); + last; + } + $offset += $r; + $$left -= $r; + } + my $head; + ($head, $buf) = split(/\r?\n\r?\n/, $buf, 2); + + my %h = map { split(/[ \t]/, $_, 2) } split(/\r?\n/, $head); + my $tag = utf8_html($h{tag}); + $type = $h{type} || '(unknown)'; + my $obj = $h{object}; + $h = $self->html_start($req, 'tag: ' . $tag); + my $label = "$type $obj"; + my $cmd = $cmd_map{$type} || 'show'; + my $rel = $req->{relcmd}; + my $obj_link = qq(<a\nhref="$rel$cmd?id=$obj">$label</a>); + $head = $h . "\n\n tag <b>$tag</b>\nobject $obj_link\n"; + if (my $tagger = $h{tagger}) { + $head .= 'tagger ' . join("\t", creator_split($tagger)) . "\n"; + } + $fh->write($head . "\n"); + + # n.b. tag subjects may not have a blank line after them, + # but we bold the first line anyways + my @buf = split(/\r?\n/s, $buf); + if (defined(my $subj = shift @buf)) { + $fh->write('<b>' . utf8_html($subj) . "</b>\n"); + + $fh->write(utf8_html($_) . "\n") foreach @buf; + } +} + +sub git_tag_show { + my ($self, $req, $h, $res) = @_; + my $git = $req->{repo_info}->{git}; + my $fh; + my $hdr = ['Content-Type', 'text/html; charset=UTF-8']; + + # yes, this could still theoretically show anything, + # but a tag could also point to anything: + $git->cat_file("refs/tags/$h", sub { + my ($cat, $left, $type, $hex) = @_; + $fh = $res->([200, $hdr]); + $h = PublicInbox::Hval->utf8($h); + my $m = "git_show_${type}_as_tag"; + + # git_show_tag_as_tag, git_show_commit_as_tag, + # git_show_tree_as_tag, git_show_blob_as_tag + if ($self->can($m)) { + $self->$m($fh, $req, $h, $cat, $left, $type, $hex); + } else { + $self->unknown_tag_type($fh, $req, $h, $type, $hex); + } + }); + unless ($fh) { + $fh = $res->([404, $hdr]); + $fh->write(invalid_tag_start($req, $h)); + } + $fh->write('</pre></body></html>'); + $fh->close; +} + +sub invalid_tag_start { + my ($self, $req, $h) = @_; + my $rel = $req->{relcmd}; + $h = 'missing tag: ' . utf8_html($h); + $self->html_start($req, $h) . "\n\n\t$h\n\n" . + qq(see <a\nhref="${rel}tag">tag list</a> for valid tags.); +} + +sub git_each_tag_sed ($$) { + my ($self, $req) = @_; + my $repo_info = $req->{repo_info}; + my $buf = ''; + my $nr = 0; + $req->{thtml} = $self->html_start($req, "$repo_info->{repo}: tag list") . + '</pre><table><tr>' . + join('', map { "<th><tt>$_</tt></th>" } qw(tag date subject)). + '</tr>'; + sub { + my $dst = delete $req->{thtml} || ''; + my $end = ''; + my @lines; + if (defined $_[0]) { + @lines = split(/\n/, $buf .= $_[0]); + $buf = pop @lines if @lines; + } else { # for-each-ref EOF + @lines = split(/\n/, $buf); + $buf = undef; + if ($nr == $req->{-tag_count}) { + $end = "<pre>Showing the latest $nr tags</pre>"; + } elsif ($nr == 0) { + $end = '<pre>no tags to show</pre>'; + } + $end = "</table>$end</body></html>"; + } + for (@lines) { + my ($ref, $date, $s) = split(' ', $_, 3); + ++$nr; + $ref =~ s!\Arefs/tags/!!; + $ref = PublicInbox::Hval->utf8($ref); + my $h = $ref->as_html; + $ref = $ref->as_href; + $dst .= qq(<tr><td><tt>) . + qq(<a\nhref="?h=$ref"><b>$h</b></a>) . + qq(</tt></td><td><tt>$date</tt></td><td><tt>) . + utf8_html($s) . '</tt></td></tr>'; + } + $dst .= $end; + } +} + +sub git_tag_list { + my ($self, $req) = @_; + my $git = $req->{repo_info}->{git}; + + # TODO: use Xapian so we can more easily handle offsets/limits + # for pagination instead of limiting + my $count = $req->{-tag_count} = 50; + my $cmd = $git->cmd(qw(for-each-ref --sort=-creatordate), + '--format=%(refname) %(creatordate:short) %(subject)', + "--count=$count", 'refs/tags/'); + my $rdr = { 2 => $git->err_begin }; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr); + my $env = $req->{env}; + $env->{'qspawn.quiet'} = 1; + $qsp->psgi_return($env, undef, sub { # parse output + my ($r) = @_; + if (!defined $r) { + my $errmsg = $git->err; + [ 500, [ 'Content-Type', 'text/html; charset=UTF-8'], + [ $errmsg ] ]; + } else { + $env->{'qspawn.filter'} = git_each_tag_sed($self, $req); + [ 200, [ 'Content-Type', 'text/html; charset=UTF-8' ]]; + } + }); +} + +sub unknown_tag_type { + my ($self, $fh, $req, $h, $type, $hex) = @_; + my $repo_info = $req->{repo_info}; + $h = $h->as_html; + my $rel = $req->{relcmd}; + my $label = "$type $hex"; + my $cmd = $cmd_map{$type} || 'show'; + my $obj_link = qq(<a\nhref="$rel$cmd?id=$hex">$label</a>\n); + + $fh->write($self->html_start($req, + "$repo_info->{repo}: ref: $h") . + "\n\n <b>$h</b> (lightweight tag)\nobject $obj_link\n"); +} + +sub creator_split { + my ($tagger) = @_; + $tagger =~ s/\s*(\d+)(?:\s+([\+\-])?([ \d]{1,2})(\d\d))\z// or + return ($tagger, 0); + my ($tz_sign, $tz_H, $tz_M) = ($2, $3, $4); + my $sec = $1; + my $off = $tz_H * 3600 + $tz_M * 60; + $off *= -1 if $tz_sign eq '-'; + my @time = gmtime($sec + $off); + my $time = strftime('%Y-%m-%d %H:%M:%S', @time)." $tz_sign$tz_H$tz_M"; + + (utf8_html($tagger), $time); +} + +1; diff --git a/lib/PublicInbox/RepoGitTree.pm b/lib/PublicInbox/RepoGitTree.pm new file mode 100644 index 00000000..4a68cf69 --- /dev/null +++ b/lib/PublicInbox/RepoGitTree.pm @@ -0,0 +1,220 @@ +# Copyright (C) 2015 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +package PublicInbox::RepoGitTree; +use strict; +use warnings; +use base qw(PublicInbox::RepoBase); +use PublicInbox::Hval qw(utf8_html); +use PublicInbox::Qspawn; + +my %GIT_MODE = ( + '100644' => ' ', # blob + '100755' => 'x', # executable blob + '040000' => 'd', # tree + '120000' => 'l', # symlink + '160000' => 'g', # commit (gitlink) +); + +my $BINARY_MSG = "Binary file, save using the 'raw' link above"; + +sub call_git_tree { + my ($self, $req) = @_; + my @extra = @{$req->{extra}}; + my $git = $req->{repo_info}->{git}; + my $q = PublicInbox::RepoGitQuery->new($req->{env}); + my $id = $q->{id}; + if ($id eq '') { + chomp($id = $git->qx(qw(rev-parse --short=10 HEAD))); + $q->{id} = $id; + } + + my $obj = "$id:$req->{expath}"; + my ($hex, $type, $size) = $git->check($obj); + + unless (defined($type)) { + return [404, ['Content-Type'=>'text/plain'], ['Not Found']]; + } + + my $opts = { nofollow => 1 }; + my $title = $req->{expath}; + $title = $title eq '' ? 'tree' : utf8_html($title); + if ($type eq 'tree') { + $opts->{noindex} = 1; + $req->{thtml} = $self->html_start($req, $title, $opts) . "\n"; + git_tree_show($req, $hex, $q); + } elsif ($type eq 'blob') { + sub { + my $res = $_[0]; + my $fh = $res->([200, + ['Content-Type','text/html; charset=UTF-8']]); + $fh->write($self->html_start($req, $title, $opts) . + "\n"); + git_blob_show($req, $fh, $git, $hex, $q); + $fh->write('</body></html>'); + $fh->close; + } + } else { + [404, ['Content-Type', 'text/plain; charset=UTF-8'], + ["Unrecognized type ($type) for $obj\n"]]; + } +} + +sub cur_path { + my ($req, $q) = @_; + my $qs = $q->qs; + my @ex = @{$req->{extra}} or return '<b>root</b>'; + my $s; + + my $rel = $req->{relcmd}; + # avoid relative paths, here, we don't want to propagate + # trailing-slash URLs although we tolerate them + $s = "<a\nhref=\"${rel}tree$qs\">root</a>/"; + my $cur = pop @ex; + my @t; + $s .= join('/', (map { + push @t, $_; + my $e = PublicInbox::Hval->utf8($_, join('/', @t)); + my $ep = $e->as_path; + my $eh = $e->as_html; + "<a\nhref=\"${rel}tree/$ep$qs\">$eh</a>"; + } @ex), '<b>'.utf8_html($cur).'</b>'); +} + +sub git_blob_show { + my ($req, $fh, $git, $hex, $q) = @_; + # ref: buffer_is_binary in git.git + my $to_read = 8000; # git uses this size to detect binary files + my $text_p; + my $n = 0; + + my $rel = $req->{relcmd}; + my $plain = join('/', "${rel}plain", @{$req->{extra}}); + $plain = PublicInbox::Hval->utf8($plain)->as_path . $q->qs; + my $t = cur_path($req, $q); + my $h = qq{\npath: $t\n\nblob $hex}; + my $end = ''; + + $git->cat_file($hex, sub { + my ($cat, $left) = @_; # $$left == $size + $h .= qq{\t$$left bytes (<a\nhref="$plain">raw</a>)}; + $to_read = $$left if $to_read > $$left; + my $r = read($cat, my $buf, $to_read); + return unless defined($r) && $r > 0; + $$left -= $r; + + if (index($buf, "\0") >= 0) { + $fh->write("$h\n$BINARY_MSG</pre>"); + return; + } + $fh->write($h."</pre><hr/><table\nsummary=blob><tr><td><pre>"); + $text_p = 1; + + while (1) { + my @buf = split(/\r?\n/, $buf, -1); + $buf = pop @buf; # last line, careful... + foreach my $l (@buf) { + ++$n; + $fh->write("<a\nid=n$n>". utf8_html($l). + "</a>\n"); + } + # no trailing newline: + if ($$left == 0 && $buf ne '') { + ++$n; + $buf = utf8_html($buf); + $fh->write("<a\nid=n$n>". $buf ."</a>"); + $end = '<pre>\ No newline at end of file</pre>'; + last; + } + + last unless defined($buf); + + $to_read = $$left if $to_read > $$left; + my $off = length $buf; # last line from previous read + $r = read($cat, $buf, $to_read, $off); + return unless defined($r) && $r > 0; + $$left -= $r; + } + 0; + }); + + # line numbers go in a second column: + $fh->write('</pre></td><td><pre>'); + $fh->write(qq(<a\nhref="#n$_">$_</a>\n)) foreach (1..$n); + $fh->write("</pre></td></tr></table><hr />$end"); +} + +sub git_tree_sed ($) { + my ($req) = @_; + my @lines; + my $buf = ''; + my $qs = $req->{qs}; + my $pfx = $req->{tpfx}; + my $end; + sub { + my $dst = delete $req->{thtml} || ''; + if (defined $_[0]) { + @lines = split(/\0/, $buf .= $_[0]); + $buf = pop @lines if @lines; + } else { + @lines = split(/\0/, $buf); + $end = '</pre></body></html>'; + } + for (@lines) { + my ($m, $x, $s, $path) = + (/\A(\S+) \S+ (\S+)( *\S+)\t(.+)\z/s); + $m = $GIT_MODE{$m} or next; + $path = PublicInbox::Hval->utf8($path); + my $ref = $path->as_path; + $path = $path->as_html; + + if ($m eq 'g') { + # TODO: support cross-repository gitlinks + $dst .= 'g' . (' ' x 15) . "$path @ $x\n"; + next; + } + elsif ($m eq 'd') { $path = "$path/" } + elsif ($m eq 'x') { $path = "<b>$path</b>" } + elsif ($m eq 'l') { $path = "<i>$path</i>" } + $s =~ s/\s+//g; + + # 'plain' and 'log' links intentionally omitted + # for brevity and speed + $dst .= qq($m\t). + qq($s\t<a\nhref="$pfx$ref$qs">$path</a>\n); + } + $dst; + } +} + +sub git_tree_show { + my ($req, $hex, $q) = @_; + my $git = $req->{repo_info}->{git}; + my $cmd = $git->cmd(qw(ls-tree -l -z), $git->abbrev, $hex); + my $rdr = { 2 => $git->err_begin }; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr); + my $t = cur_path($req, $q); + my $pfx; + + $req->{thtml} .= "\npath: $t\n\n<b>mode\tsize\tname</b>\n"; + $req->{qs} = $q->qs; + if ($req->{tslash}) { + $pfx = './'; + } elsif (defined(my $last = $req->{extra}->[-1])) { + $pfx = PublicInbox::Hval->utf8($last)->as_path . '/'; + } else { + $pfx = 'tree/'; + } + $req->{tpfx} = $pfx; + my $env = $req->{env}; + $qsp->psgi_return($env, undef, sub { + my ($r) = @_; + if (defined $r) { + $env->{'qspawn.filter'} = git_tree_sed($req); + [ 200, [ 'Content-Type', 'text/html' ] ]; + } else { + [ 500, [ 'Content-Type', 'text/plain' ], [ $git->err ]]; + } + }); +} + +1; diff --git a/lib/PublicInbox/RepoRoot.pm b/lib/PublicInbox/RepoRoot.pm new file mode 100644 index 00000000..c04c23c5 --- /dev/null +++ b/lib/PublicInbox/RepoRoot.pm @@ -0,0 +1,71 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# displays the root '/' where all the projects lie +package PublicInbox::RepoRoot; +use strict; +use warnings; +use base qw(PublicInbox::RepoBase); +use PublicInbox::Hval qw(utf8_html); + +sub call { + my ($self, $rconfig) = @_; + sub { + my ($res) = @_; # PSGI callback + my @h = ('Content-Type', 'text/html; charset=UTF-8'); + my $fh = $res->([200, \@h]); + repobrowse_index($fh, $rconfig); + $fh->close; + } +} + +sub repobrowse_index { + my ($fh, $rconfig) = @_; + my $title = 'repobrowse index'; + $fh->write("<html><head><title>$title</title>" . + PublicInbox::Hval::STYLE . + "</head><body><pre><b>$title</b>"); + + # preload all groups + foreach my $k (sort keys %$rconfig) { + $k =~ /\Arepo\.(.+)\.path\z/ or next; + my $repo_path = $1; + $rconfig->lookup($repo_path); # insert into groups + } + + my $groups = $rconfig->{-groups}; + if (scalar(keys %$groups) > 2) { # default has '-none' + '-hidden' + $fh->write("\n\n<b>uncategorized</b></pre>". + "<table\nsummary=repoindex>"); + } else { + $fh->write("</pre><table\nsummary=repoindex>"); + } + foreach my $repo_path (sort @{$groups->{-none}}) { + my $r = $rconfig->lookup($repo_path); + my $p = PublicInbox::Hval->utf8($r->{repo}); + my $l = $p->as_html; + $p = $p->as_path; + $fh->write(qq(<tr><td><tt><a\nhref="$p">$l</a></tt></td>) . + '<td><tt> '.$r->desc_html.'</tt></td></tr>'); + } + + foreach my $group (keys %$groups) { + next if $group =~ /\A-(?:none|hidden)\z/; + my $g = utf8_html($group); + $fh->write("<tr><td><pre> </pre></td></tr>". + "<tr><td><pre><b>$g</b></pre></tr>"); + foreach my $repo_path (sort @{$groups->{$group}}) { + my $r = $rconfig->lookup($repo_path); + my $p = PublicInbox::Hval->utf8($r->{repo}); + my $l = $p->as_html; + $p = $p->as_path; + $fh->write('<tr><td><tt> ' . + qq(<a\nhref="$p">$l</a></tt></td>) . + '<td><tt> '.$r->desc_html.'</tt></td></tr>'); + } + } + + $fh->write('</table></body></html>'); +} + +1; diff --git a/lib/PublicInbox/Repobrowse.pm b/lib/PublicInbox/Repobrowse.pm new file mode 100644 index 00000000..87e12278 --- /dev/null +++ b/lib/PublicInbox/Repobrowse.pm @@ -0,0 +1,166 @@ +# Copyright (C) 2015 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# Version control system (VCS) repository viewer like cgit or gitweb, +# but with optional public-inbox archive integration. +# This uses cgit-compatible PATH_INFO URLs. +# This may be expanded to support other Free Software VCSes such as +# Subversion and Mercurial, so not just git +# +# Same web design principles as PublicInbox::WWW for supporting the +# lowest common denominators (see bottom of Documentation/design_www.txt) +# +# This allows an M:N relationship between "normal" repos for project +# and public-inbox (ssoma) git repositories where N may be zero. +# In other words, repobrowse must work for repositories without +# any public-inbox at all; or with multiple public-inboxes. +# And the rest of public-inbox will always work without a "normal" +# code repo for the project. + +package PublicInbox::Repobrowse; +use strict; +use warnings; +use URI::Escape qw(uri_escape_utf8 uri_unescape); +use PublicInbox::RepoConfig; + +my %CMD = map { lc($_) => $_ } qw(Log Commit Tree Patch Blob Plain Tag Atom + Diff Snapshot); +my %VCS = (git => 'Git'); +my %LOADED; + +sub new { + my ($class, $rconfig) = @_; + $rconfig ||= PublicInbox::RepoConfig->new; + bless { rconfig => $rconfig }, $class; +} + +# simple response for errors +sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } + +sub base_url ($) { + my ($env) = @_; + my $scheme = $env->{'psgi.url_scheme'} || 'http'; + my $host = $env->{HTTP_HOST}; + my $base = "$scheme://"; + if (defined $host) { + $base .= $host; + } else { + $base .= $env->{SERVER_NAME}; + my $port = $env->{SERVER_PORT} || 80; + if (($scheme eq 'http' && $port != 80) || + ($scheme eq 'https' && $port != 443)) { + $base.= ":$port"; + } + } + $base .= $env->{SCRIPT_NAME}; +} + +# Remove trailing slash in URLs which regular humans are likely to read +# in an attempt to improve cache hit ratios. Do not redirect +# plain|patch|blob|fallback endpoints since those could be using +# automated tools which may not follow redirects automatically +# (e.g. curl does not follow 301 unless given "-L") +my %NO_TSLASH = map { $_ => 1 } qw(Log Commit Tree Summary Tag); +sub no_tslash { + my ($env) = @_; + my $base = base_url($env); + my $uri = $env->{REQUEST_URI}; + my $qs = ''; + if ($uri =~ s/(\?.+)\z//) { + $qs = $1; + } + if ($uri !~ s!/+\z!!) { + warn "W: buggy redirect? base=$base request_uri=$uri\n"; + } + my $url = $base . $uri . $qs; + [ 301, + [ Location => $url, 'Content-Type' => 'text/plain' ], + [ "Redirecting to $url\n" ] ] +} + +sub root_index { + my ($self) = @_; + my $mod = load_once('PublicInbox::RepoRoot'); + $mod->new->call($self->{rconfig}); # RepoRoot::call +} + +sub call { + my ($self, $env) = @_; + my $method = $env->{REQUEST_METHOD}; + return r(405, 'Method Not Allowed') if ($method !~ /\AGET|HEAD|POST\z/); + + # URL syntax: / repo [ / cmd [ / path ] ] + # cmd: log | commit | diff | tree | view | blob | snapshot + # repo and path (@extra) may both contain '/' + my $path_info = uri_unescape($env->{PATH_INFO}); + my (undef, $repo_path, @extra) = split(m{/+}, $path_info, -1); + + return $self->root_index($self) unless length($repo_path); + + my $rconfig = $self->{rconfig}; # RepoConfig + my $repo_info; + until ($repo_info = $rconfig->lookup($repo_path)) { + my $p = shift @extra or last; + $repo_path .= "/$p"; + } + return r404() unless $repo_info; + + my $req = { + repo_info => $repo_info, + extra => \@extra, # path + rconfig => $rconfig, + env => $env, + }; + my $tslash = 0; + my $cmd = shift @extra; + my $vcs_lc = $repo_info->{vcs}; + my $vcs = $VCS{$vcs_lc} or return r404(); + my $mod; + if (defined $cmd && length $cmd) { + $mod = $CMD{$cmd}; + unless ($mod) { + unshift @extra, $cmd; + $mod = 'Fallback'; + } + $req->{relcmd} = '../' x scalar(@extra); + } else { + $mod = 'Summary'; + $cmd = 'summary'; + if ($path_info =~ m!/\z!) { + $tslash = $path_info =~ tr!/!!; + } else { + my @x = split('/', $repo_path); + $req->{relcmd} = @x > 1 ? "./$x[-1]/" : "/$x[-1]/"; + } + } + while (@extra && $extra[-1] eq '') { + pop @extra; + ++$tslash; + } + + return no_tslash($env) if ($tslash && $NO_TSLASH{$mod}); + + $req->{tslash} = $tslash; + $mod = load_once("PublicInbox::Repo$vcs$mod"); + $vcs = load_once("PublicInbox::$vcs"); + + # $repo_info->{git} ||= PublicInbox::Git->new(...) + $repo_info->{$vcs_lc} ||= $vcs->new($repo_info->{path}); + + $req->{expath} = join('/', @extra); + my $rv = eval { $mod->new->call($cmd, $req) }; # RepoBase::call + $rv || r404(); +} + +sub r404 { r(404, 'Not Found') } + +sub load_once { + my ($mod) = @_; + + return $mod if $LOADED{$mod}; + eval "require $mod"; + $LOADED{$mod} = 1 unless $@; + $mod; +} + +1; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 8c72fa17..02d1827e 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -55,8 +55,6 @@ my %bool_pfx_internal = ( ); my %bool_pfx_external = ( - # do we still need these? probably not.. - path => 'XPATH', mid => 'Q', # uniQue id (Message-ID) ); @@ -106,11 +104,7 @@ chomp @HELP; # da (diff a/ removed lines) # db (diff b/ added lines) -my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix); - -sub xpfx { $all_pfx{$_[0]} } - -my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail'); +my $mail_query = Search::Xapian::Query->new('T' . 'mail'); sub xdir { my (undef, $git_dir) = @_; @@ -145,11 +139,11 @@ sub get_thread { my $smsg = eval { $self->lookup_message($mid) }; return { total => 0, msgs => [] } unless $smsg; - my $qtid = Search::Xapian::Query->new(xpfx('thread').$smsg->thread_id); + my $qtid = Search::Xapian::Query->new('G' . $smsg->thread_id); my $path = $smsg->path; if (defined $path && $path ne '') { my $path = id_compress($smsg->path); - my $qsub = Search::Xapian::Query->new(xpfx('path').$path); + my $qsub = Search::Xapian::Query->new('XPATH' . $path); $qtid = Search::Xapian::Query->new(OP_OR, $qtid, $qsub); } $opts ||= {}; @@ -278,7 +272,7 @@ sub lookup_message { my ($self, $mid) = @_; $mid = mid_clean($mid); - my $doc_id = $self->find_unique_doc_id('mid', $mid); + my $doc_id = $self->find_unique_doc_id('Q' . $mid); my $smsg; if (defined $doc_id) { # raises on error: @@ -298,9 +292,9 @@ sub lookup_mail { # no ghosts! } sub find_unique_doc_id { - my ($self, $term, $value) = @_; + my ($self, $termval) = @_; - my ($begin, $end) = $self->find_doc_ids($term, $value); + my ($begin, $end) = $self->find_doc_ids($termval); return undef if $begin->equal($end); # not found @@ -308,23 +302,16 @@ sub find_unique_doc_id { # sanity check $begin->inc; - $begin->equal($end) or die "Term '$term:$value' is not unique\n"; + $begin->equal($end) or die "Term '$termval' is not unique\n"; $rv; } # returns begin and end PostingIterator sub find_doc_ids { - my ($self, $term, $value) = @_; - - $self->find_doc_ids_for_term(xpfx($term) . $value); -} - -# returns begin and end PostingIterator -sub find_doc_ids_for_term { - my ($self, $term) = @_; + my ($self, $termval) = @_; my $db = $self->{xdb}; - ($db->postlist_begin($term), $db->postlist_end($term)); + ($db->postlist_begin($termval), $db->postlist_end($termval)); } # normalize subjects so they are suitable as pathnames for URLs diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 8a529c66..8200b54c 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -16,18 +16,14 @@ $Email::MIME::ContentType::STRICT_PARAMS = 0; use base qw(PublicInbox::Search); use PublicInbox::MID qw/mid_clean id_compress mid_mime/; use PublicInbox::MsgIter; +use PublicInbox::GitIdx; use Carp qw(croak); use POSIX qw(strftime); require PublicInbox::Git; -*xpfx = *PublicInbox::Search::xpfx; -use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian use constant { - PERM_UMASK => 0, - OLD_PERM_GROUP => 1, - OLD_PERM_EVERYBODY => 2, - PERM_GROUP => 0660, - PERM_EVERYBODY => 0664, + MAX_MID_SIZE => 244, # max term size - 1 in Xapian + BATCH_BYTES => 1_000_000, }; sub new { @@ -46,11 +42,10 @@ sub new { } require Search::Xapian::WritableDatabase; my $self = bless { git_dir => $git_dir, -altid => $altid }, $class; - my $perm = $self->_git_config_perm; - my $umask = _umask_for($perm); - $self->{umask} = $umask; + my $git = $self->{git} = PublicInbox::Git->new($git_dir); + my $umask = git_umask_for($git); + $self->{'umask'} = $umask; $self->{lock_path} = "$git_dir/ssoma.lock"; - $self->{git} = PublicInbox::Git->new($git_dir); $self->{creat} = ($creat || 0) == 1; $self; } @@ -72,7 +67,6 @@ sub _xdb_acquire { require File::Path; _lock_acquire($self); File::Path::mkpath($dir); - $self->{batch_size} = 100; $flag = Search::Xapian::DB_CREATE_OR_OPEN; } $self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag); @@ -160,12 +154,12 @@ sub add_message { } $smsg = PublicInbox::SearchMsg->new($mime); my $doc = $smsg->{doc}; - $doc->add_term(xpfx('mid') . $mid); + $doc->add_term('Q' . $mid); my $subj = $smsg->subject; if ($subj ne '') { my $path = $self->subject_path($subj); - $doc->add_term(xpfx('path') . id_compress($path)); + $doc->add_term('XPATH' . id_compress($path)); } add_values($smsg, $bytes, $num); @@ -332,7 +326,7 @@ sub link_message { } else { $tid = $self->next_thread_id; } - $doc->add_term(xpfx('thread') . $tid); + $doc->add_term('G' . $tid); } sub index_blob { @@ -393,7 +387,16 @@ sub do_cat_mail { sub index_sync { my ($self, $opts) = @_; - with_umask($self, sub { $self->_index_sync($opts) }); + with_umask($self->{'umask'}, sub { $self->_index_sync($opts) }); +} + +sub batch_adjust ($$$$) { + my ($max, $bytes, $batch_cb, $latest) = @_; + $$max -= $bytes; + if ($$max <= 0) { + $$max = BATCH_BYTES; + $batch_cb->($latest, 1); + } } sub rlog { @@ -405,23 +408,21 @@ sub rlog { my $git = $self->{git}; my $latest; my $bytes; - my $max = $self->{batch_size}; # may be undef + my $max = BATCH_BYTES; local $/ = "\n"; my $line; while (defined($line = <$log>)) { if ($line =~ /$addmsg/o) { my $blob = $1; my $mime = do_cat_mail($git, $blob, \$bytes) or next; + batch_adjust(\$max, $bytes, $batch_cb, $latest); $add_cb->($self, $mime, $bytes, $blob); } elsif ($line =~ /$delmsg/o) { my $blob = $1; - my $mime = do_cat_mail($git, $blob) or next; + my $mime = do_cat_mail($git, $blob, \$bytes) or next; + batch_adjust(\$max, $bytes, $batch_cb, $latest); $del_cb->($self, $mime); } elsif ($line =~ /^commit ($h40)/o) { - if (defined $max && --$max <= 0) { - $max = $self->{batch_size}; - $batch_cb->($latest, 1); - } $latest = $1; } } @@ -542,9 +543,9 @@ sub create_ghost { my $tid = $self->next_thread_id; my $doc = Search::Xapian::Document->new; - $doc->add_term(xpfx('mid') . $mid); - $doc->add_term(xpfx('thread') . $tid); - $doc->add_term(xpfx('type') . 'ghost'); + $doc->add_term('Q' . $mid); + $doc->add_term('G' . $tid); + $doc->add_term('T' . 'ghost'); my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); $self->{xdb}->add_document($doc); @@ -555,75 +556,18 @@ sub create_ghost { sub merge_threads { my ($self, $winner_tid, $loser_tid) = @_; return if $winner_tid == $loser_tid; - my ($head, $tail) = $self->find_doc_ids('thread', $loser_tid); - my $thread_pfx = xpfx('thread'); + my ($head, $tail) = $self->find_doc_ids('G' . $loser_tid); my $db = $self->{xdb}; for (; $head != $tail; $head->inc) { my $docid = $head->get_docid; my $doc = $db->get_document($docid); - $doc->remove_term($thread_pfx . $loser_tid); - $doc->add_term($thread_pfx . $winner_tid); + $doc->remove_term('G' . $loser_tid); + $doc->add_term('G' . $winner_tid); $db->replace_document($docid, $doc); } } -sub _read_git_config_perm { - my ($self) = @_; - my @cmd = qw(config core.sharedRepository); - my $fh = PublicInbox::Git->new($self->{git_dir})->popen(@cmd); - local $/ = "\n"; - my $perm = <$fh>; - chomp $perm if defined $perm; - $perm; -} - -sub _git_config_perm { - my $self = shift; - my $perm = scalar @_ ? $_[0] : _read_git_config_perm($self); - return PERM_GROUP if (!defined($perm) || $perm eq ''); - return PERM_UMASK if ($perm eq 'umask'); - return PERM_GROUP if ($perm eq 'group'); - if ($perm =~ /\A(?:all|world|everybody)\z/) { - return PERM_EVERYBODY; - } - return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/); - return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/); - - my $i = oct($perm); - return PERM_UMASK if ($i == PERM_UMASK); - return PERM_GROUP if ($i == OLD_PERM_GROUP); - return PERM_EVERYBODY if ($i == OLD_PERM_EVERYBODY); - - if (($i & 0600) != 0600) { - die "core.sharedRepository mode invalid: ". - sprintf('%.3o', $i) . "\nOwner must have permissions\n"; - } - ($i & 0666); -} - -sub _umask_for { - my ($perm) = @_; # _git_config_perm return value - my $rv = $perm; - return umask if $rv == 0; - - # set +x bit if +r or +w were set - $rv |= 0100 if ($rv & 0600); - $rv |= 0010 if ($rv & 0060); - $rv |= 0001 if ($rv & 0006); - (~$rv & 0777); -} - -sub with_umask { - my ($self, $cb) = @_; - my $old = umask $self->{umask}; - my $rv = eval { $cb->() }; - my $err = $@; - umask $old; - die $err if $@; - $rv; -} - sub DESTROY { # order matters for unlocking $_[0]->{xdb} = undef; diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index b8eee665..a19d45db 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -14,7 +14,7 @@ use PublicInbox::Address; sub new { my ($class, $mime) = @_; my $doc = Search::Xapian::Document->new; - $doc->add_term(PublicInbox::Search::xpfx('type') . 'mail'); + $doc->add_term('T' . 'mail'); bless { type => 'mail', doc => $doc, mime => $mime }, $class; } diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm index 41b08a33..e543be54 100644 --- a/lib/PublicInbox/Spawn.pm +++ b/lib/PublicInbox/Spawn.pm @@ -190,8 +190,6 @@ sub popen_rd { my ($cmd, $env, $opts) = @_; pipe(my ($r, $w)) or die "pipe: $!\n"; $opts ||= {}; - my $blocking = $opts->{Blocking}; - IO::Handle::blocking($r, $blocking) if defined $blocking; $opts->{1} = fileno($w); my $pid = spawn($cmd, $env, $opts); return unless defined $pid; |