diff options
-rw-r--r-- | Documentation/.gitignore | 1 | ||||
-rw-r--r-- | Documentation/include.mk | 6 | ||||
-rw-r--r-- | Documentation/public-inbox-config.pod | 9 | ||||
-rwxr-xr-x | Documentation/standards.perl | 77 | ||||
-rw-r--r-- | MANIFEST | 2 | ||||
-rw-r--r-- | examples/cgit-commit-filter.lua | 10 | ||||
-rw-r--r-- | examples/cgit-wwwhighlight-filter.lua | 105 | ||||
-rw-r--r-- | lib/PublicInbox/Cgit.pm | 16 | ||||
-rw-r--r-- | lib/PublicInbox/ExtMsg.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/View.pm | 3 | ||||
-rw-r--r-- | lib/PublicInbox/ViewDiff.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/WwwHighlight.pm | 11 | ||||
-rw-r--r-- | lib/PublicInbox/WwwStream.pm | 5 | ||||
-rw-r--r-- | t/search.t | 12 |
14 files changed, 247 insertions, 16 deletions
diff --git a/Documentation/.gitignore b/Documentation/.gitignore index 107ad36f..e78a0d33 100644 --- a/Documentation/.gitignore +++ b/Documentation/.gitignore @@ -1 +1,2 @@ /public-inbox-*.txt +/standards.txt diff --git a/Documentation/include.mk b/Documentation/include.mk index 28fa7574..02cbef30 100644 --- a/Documentation/include.mk +++ b/Documentation/include.mk @@ -81,8 +81,12 @@ txt2pre = $(PERL) -I lib ./Documentation/txt2pre <$< >$@+ && \ touch -r $< $@+ && mv $@+ $@ txt := INSTALL README COPYING TODO dtxt := design_notes.txt design_www.txt dc-dlvr-spam-flow.txt hosted.txt +dtxt += standards.txt dtxt := $(addprefix Documentation/, $(dtxt)) $(mantxt) +Documentation/standards.txt : Documentation/standards.perl + $(PERL) $< >$@+ && mv $@+ $@ + %.html: %.txt TITLE="$(basename $(<F))" $(txt2pre) %.html: % @@ -91,7 +95,7 @@ dtxt := $(addprefix Documentation/, $(dtxt)) $(mantxt) docs_html := $(addsuffix .html, $(subst .txt,,$(dtxt)) $(txt)) html: $(docs_html) gz_docs := $(addsuffix .gz, $(docs) $(docs_html)) -rsync_docs := $(gz_docs) $(docs) $(txt) $(docs_html) +rsync_docs := $(gz_docs) $(docs) $(txt) $(docs_html) $(dtxt) %.gz: % gzip -9 --rsyncable <$< >$@+ touch -r $< $@+ diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod index 17b8bac7..d44c8f30 100644 --- a/Documentation/public-inbox-config.pod +++ b/Documentation/public-inbox-config.pod @@ -225,6 +225,15 @@ directive is configured. Default: /var/www/htdocs/cgit/cgit.cgi or /usr/lib/cgit/cgit.cgi +=item publicinbox.cgitdata + +A path to the data directory used by cgit for storing static files. +Typically guessed based the location of C<cgit.cgi> (from +C<publicinbox.cgitbin>, but may be overridden. + +Default: basename of C<publicinbox.cgitbin>, /var/www/htdocs/cgit/ +or /usr/share/cgit/ + =item publicinbox.wwwlisting Enable a HTML listing style when the root path of the URL '/' is accessed. diff --git a/Documentation/standards.perl b/Documentation/standards.perl new file mode 100755 index 00000000..f75c4122 --- /dev/null +++ b/Documentation/standards.perl @@ -0,0 +1,77 @@ +#!/usr/bin/perl -w +use strict; +# Copyright 2019 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +print <<EOF; +Relevant standards for public-inbox users and hackers +----------------------------------------------------- + +Non-exhaustive list of standards public-inbox software attempts or +intends to implement. This list is intended to be a quick reference +for hackers and users. + +Given the goals of interoperability and accessibility; strict +conformance to standards is not always possible, but rather +best-effort taking into account real-world cases. In particular, +"obsolete" standards remain relevant as long as clients and +data exists. + +IETF RFCs +--------- + +EOF + +my $rfcs = [ + 3977 => 'NNTP', + 977 => 'NNTP (old)', + 6048 => 'NNTP additions to LIST command (TODO)', + 8054 => 'NNTP compression (TODO)', + 4642 => 'NNTP TLS (TODO)', + 8143 => 'NNTP TLS (TODO)', + 2980 => 'NNTP extensions (obsolete, but NOT irrelevant)', + 4287 => 'Atom syndication', + 4685 => 'Atom threading extensions', + 2919 => 'List-Id mail header', + 5064 => 'Archived-At mail header', + 3986 => 'URI escaping', + 1521 => 'MIME extensions', + 2616 => 'HTTP/1.1 (newer updates should apply, too)', + 7230 => 'HTTP/1.1 message syntax and routing', + 7231 => 'HTTP/1.1 semantics and content', + 2822 => 'Internet message format', + # TODO: flesh this out + +]; + +my @rfc_urls = qw(tools.ietf.org/html/rfc%d + www.rfc-editor.org/errata_search.php?rfc=%d); + +for (my $i = 0; $i < $#$rfcs;) { + my $num = $rfcs->[$i++]; + my $txt = $rfcs->[$i++]; + print "rfc$num\t- $txt\n"; + + printf "\thttps://$_\n", $num foreach @rfc_urls; + print "\n"; +} + +print <<'EOF' +Other relevant documentation +---------------------------- + +* Documentation/technical/http-protocol.txt in git source code: + https://public-inbox.org/git/9c5b6f0fac/s + +* Various mbox formats (we currently emit and parse mboxrd) + https://en.wikipedia.org/wiki/Mbox + +* PSGI/Plack specifications (as long as our web frontend uses Perl5) + git clone https://github.com/plack/psgi-specs.git + +Copyright +--------- + +Copyright 2019 all contributors <meta@public-inbox.org> +License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +EOF @@ -19,6 +19,7 @@ Documentation/public-inbox-overview.pod Documentation/public-inbox-v1-format.pod Documentation/public-inbox-v2-format.pod Documentation/public-inbox-watch.pod +Documentation/standards.perl Documentation/txt2pre HACKING INSTALL @@ -38,6 +39,7 @@ examples/apache2_perl.conf examples/apache2_perl_old.conf examples/cgi-webrick.rb examples/cgit-commit-filter.lua +examples/cgit-wwwhighlight-filter.lua examples/cgit.psgi examples/highlight.psgi examples/logrotate.conf diff --git a/examples/cgit-commit-filter.lua b/examples/cgit-commit-filter.lua index 7799befa..16772534 100644 --- a/examples/cgit-commit-filter.lua +++ b/examples/cgit-commit-filter.lua @@ -13,14 +13,19 @@ local urls = {} urls['public-inbox.git'] = 'https://public-inbox.org/meta/' -- additional URLs here... +-- TODO we should be able to auto-generate this based on "coderepo" +-- directives in the public-inbox config file; but keep in mind +-- the mapping is M:N between inboxes and coderepos function filter_open(...) lineno = 0 buffer = "" - subject = "" end function filter_close() + -- cgit opens and closes this filter for the commit subject + -- and body separately, and we only generate the link based + -- on the commit subject: if lineno == 1 and string.find(buffer, "\n") == nil then u = urls[os.getenv('CGIT_REPO_URL')] if u == nil then @@ -33,6 +38,9 @@ function filter_close() html('</tt></a>') end else + -- pass the body-through as-is + -- TODO: optionally use WwwHighlight for linkification like + -- cgit-wwwhighlight-filter.lua html(buffer) end return 0 diff --git a/examples/cgit-wwwhighlight-filter.lua b/examples/cgit-wwwhighlight-filter.lua new file mode 100644 index 00000000..a267d1c8 --- /dev/null +++ b/examples/cgit-wwwhighlight-filter.lua @@ -0,0 +1,105 @@ +-- Copyright (C) 2019 all contributors <meta@public-inbox.org> +-- License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt> +-- +-- This filter accesses the PublicInbox::WwwHighlight PSGI endpoint +-- (see examples/highlight.psgi) +-- +-- Dependencies: lua-http +-- +-- disclaimer: written by someone who does not know Lua. +-- +-- This requires cgit linked with Lua +-- Usage (in your cgitrc(5) config file): +-- +-- source-filter=lua:/path/to/this/script.lua +-- about-filter=lua:/path/to/this/script.lua +-- +local wwwhighlight_url = 'http://127.0.0.1:9090/' +local req_timeout = 10 +local too_big = false + +-- match $PublicInbox::HTTP::MAX_REQUEST_BUFFER +local max_len = 10 * 1024 * 1024 + +-- about-filter needs surrounding <pre> tags if all we do is +-- highlight and linkify +local pre = true + +function filter_open(...) + req_body = "" + + -- detect when we're used in an about-filter + local repo_url = os.getenv('CGIT_REPO_URL') + if repo_url then + local path_info = os.getenv('PATH_INFO') + rurl = path_info:match("^/(.+)/about/?$") + pre = rurl == repo_url + end + + -- hand filename off for language detection + local fn = select(1, ...) + if fn then + local http_util = require 'http.util' + wwwhighlight_url = wwwhighlight_url .. http_util.encodeURI(fn) + end +end + +-- try to buffer the entire source in memory +function filter_write(str) + if too_big then + html(str) + elseif (req_body:len() + str:len()) > max_len then + too_big = true + req_body = "" + html(req_body) + html(str) + else + req_body = req_body .. str + end +end + +function fail(err) + io.stderr:write(tostring(err), "\n") + if pre then + html("<pre>") + end + html_txt(req_body) + if pre then + html("</pre>") + end + return 1 +end + +function filter_close() + if too_big then + return 0 + end + local request = require 'http.request' + local req = request.new_from_uri(wwwhighlight_url) + req.headers:upsert(':method', 'PUT') + req:set_body(req_body) + + -- don't wait for 100-Continue message from the PSGI app + req.headers:delete('expect') + + local headers, stream = req:go(req_timeout) + if headers == nil then + return fail(stream) + end + local status = headers:get(':status') + if status ~= '200' then + return fail('status ' .. status) + end + local body, err = stream:get_body_as_string() + if not body and err then + return fail(err) + end + if pre then + html("<pre>") + end + html(body) + if pre then + html("</pre>") + end + return 0 +end diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm index 8922ec56..353f4162 100644 --- a/lib/PublicInbox/Cgit.pm +++ b/lib/PublicInbox/Cgit.pm @@ -35,7 +35,15 @@ sub locate_cgit ($) { } } unless (defined $cgit_data) { - foreach my $d (qw(/var/www/htdocs/cgit /usr/share/cgit)) { + my @dirs = qw(/var/www/htdocs/cgit /usr/share/cgit); + + # local installs of cgit from source have + # CGIT_SCRIPT_PATH==CGIT_DATA_PATH by default, + # so we can usually infer the cgit_data path from cgit_bin + if (defined($cgit_bin) && $cgit_bin =~ m!\A(.+?)/[^/]+\z!) { + unshift @dirs, $1 if -d $1; + } + foreach my $d (@dirs) { my $f = "$d/cgit.css"; next unless -f $f; $cgit_data = $d; @@ -90,6 +98,7 @@ my @PASS_ENV = qw( sub call { my ($self, $env) = @_; my $path_info = $env->{PATH_INFO}; + my $cgit_data; # handle requests without spawning cgit iff possible: if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!ox) { @@ -97,10 +106,11 @@ sub call { if (my $git = $self->{"\0$nick"}) { return serve($env, $git, $path); } - } elsif ($path_info =~ m!$self->{static}!) { + } elsif ($path_info =~ m!$self->{static}! && + defined($cgit_data = $self->{cgit_data})) { my $f = $1; my $type = Plack::MIME->mime_type($f); - return static_result($env, [], "$self->{cgit_data}$f", $type); + return static_result($env, [], $cgit_data.$f, $type); } my $cgi_env = { PATH_INFO => $path_info }; diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 14d49cc5..d07d5a79 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -8,13 +8,13 @@ package PublicInbox::ExtMsg; use strict; use warnings; -use PublicInbox::Hval; +use PublicInbox::Hval qw/ascii_html/; use PublicInbox::MID qw/mid2path/; use PublicInbox::WwwStream; our $MIN_PARTIAL_LEN = 16; # TODO: user-configurable -our @EXT_URL = ( +our @EXT_URL = map { ascii_html($_) } ( # leading "//" denotes protocol-relative (http:// or https://) '//marc.info/?i=%s', '//www.mail-archive.com/search?l=mid&q=%s', diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 62bdf0a1..47a2046e 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -458,7 +458,8 @@ sub thread_html { $ctx->{prev_level} = 0; $ctx->{root_anchor} = anchor_for($mid); $ctx->{mapping} = {}; - $ctx->{s_nr} = "$nr+ messages in thread"; + $ctx->{s_nr} = ($nr > 1 ? "$nr+ messages" : 'only message') + .' in thread'; my $rootset = thread_results($ctx, $msgs); diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm index 0cce952d..6b8d9437 100644 --- a/lib/PublicInbox/ViewDiff.pm +++ b/lib/PublicInbox/ViewDiff.pm @@ -146,7 +146,7 @@ sub flush_diff ($$$) { if ($s =~ /^---$/) { to_state($dst, $state, DSTATE_STAT); $$dst .= $s; - } elsif ($s =~ /^ /) { + } elsif ($s =~ /^ / || ($s =~ /^$/ && $state >= DSTATE_CTX)) { # works for common cases, but not weird/long filenames if ($state == DSTATE_STAT && $s =~ /^ (.+)( +\| .*\z)/s) { diff --git a/lib/PublicInbox/WwwHighlight.pm b/lib/PublicInbox/WwwHighlight.pm index 01916401..bc349f8a 100644 --- a/lib/PublicInbox/WwwHighlight.pm +++ b/lib/PublicInbox/WwwHighlight.pm @@ -24,6 +24,8 @@ use warnings; use bytes (); # only for bytes::length use HTTP::Status qw(status_message); use parent qw(PublicInbox::HlMod); +use PublicInbox::Linkify qw(); +use PublicInbox::Hval qw(ascii_html); # TODO: support highlight(1) for distros which don't package the # SWIG extension. Also, there may be admins who don't want to @@ -64,7 +66,14 @@ sub call { return r(405) if $req_method ne 'PUT'; my $bref = read_in_full($env) or return r(500); - $bref = $self->do_hl($bref, $env->{PATH_INFO}); + my $l = PublicInbox::Linkify->new; + $l->linkify_1($$bref); + if (my $res = $self->do_hl($bref, $env->{PATH_INFO})) { + $bref = $res; + } else { + $$bref = ascii_html($$bref); + } + $l->linkify_2($$bref); my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ]; push @$h, 'Content-Length', bytes::length($$bref); diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index c708c21f..2893138d 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -12,7 +12,6 @@ use warnings; use PublicInbox::Hval qw(ascii_html); use URI; our $TOR_URL = 'https://www.torproject.org/'; -our $TOR2WEB_URL = 'https://www.tor2web.org/'; our $CODE_URL = 'https://public-inbox.org/'; our $PROJECT = 'public-inbox'; @@ -140,10 +139,6 @@ EOF if ($urls =~ m!\b[^:]+://\w+\.onion/!) { $urls .= "\n note: .onion URLs require Tor: "; $urls .= qq[<a\nhref="$TOR_URL">$TOR_URL</a>]; - if ($TOR2WEB_URL) { - $urls .= "\n or Tor2web: "; - $urls .= qq[<a\nhref="$TOR2WEB_URL">$TOR2WEB_URL</a>]; - } } '<hr><pre>'.join("\n\n", $desc, @@ -430,13 +430,23 @@ $ibx->with_umask(sub { is($ro->lookup_article($art->{num}), undef, 'gone from OVER DB') if defined($art); }); +my $all_mask = 07777; +my $dir_mask = 02770; + +# FreeBSD does not allow non-root users to set S_ISGID, so +# git doesn't set it, either (see DIR_HAS_BSD_GROUP_SEMANTICS in git.git) +if ($^O =~ /freebsd/i) { + $all_mask = 0777; + $dir_mask = 0770; +} + foreach my $f ("$git_dir/public-inbox/msgmap.sqlite3", "$git_dir/public-inbox", glob("$git_dir/public-inbox/xapian*/"), glob("$git_dir/public-inbox/xapian*/*")) { my @st = stat($f); my ($bn) = (split(m!/!, $f))[-1]; - is($st[2] & 07777, -f _ ? 0660 : 02770, + is($st[2] & $all_mask, -f _ ? 0660 : $dir_mask, "sharedRepository respected for $bn"); } |