diff options
-rw-r--r-- | MANIFEST | 26 | ||||
-rw-r--r-- | examples/public-inbox.psgi | 12 | ||||
-rw-r--r-- | lib/PublicInbox/WWW.pm | 208 | ||||
-rwxr-xr-x | public-inbox.cgi | 254 | ||||
-rw-r--r-- | t/plack.t | 112 |
5 files changed, 374 insertions, 238 deletions
@@ -1,31 +1,53 @@ .gitignore COPYING +Documentation/dc-dlvr-spam-flow.txt Documentation/design_notes.txt +Documentation/design_www.txt +Documentation/include.mk +Documentation/public-inbox-mda.txt +Documentation/txt2pre +HACKING INSTALL MANIFEST Makefile.PL README +TODO +examples/README +examples/apache2_cgi.conf +examples/apache2_perl.conf +examples/cgi-webrick.rb examples/public-inbox-config -lib/PublicInbox/MDA.pm +examples/public-inbox.psgi lib/PublicInbox/Config.pm lib/PublicInbox/Feed.pm lib/PublicInbox/Filter.pm lib/PublicInbox/GitCatFile.pm lib/PublicInbox/Hval.pm +lib/PublicInbox/MDA.pm lib/PublicInbox/View.pm +lib/PublicInbox/WWW.pm +public-inbox-learn public-inbox-mda +public-inbox.cgi sa_config/Makefile sa_config/README sa_config/root/etc/spamassassin/public-inbox.pre sa_config/user/.spamassassin/user_prefs scripts/dc-dlvr -scripts/import_gmane_spool +scripts/dc-dlvr.pre +scripts/edit-sa-prefs +scripts/import_maildir scripts/report-spam +scripts/slrnspool2maildir +t/cgi.t t/config.t t/fail-bin/spamc t/feed.t t/filter.t +t/html_index.t t/main-bin/spamc t/mda.t +t/plack.t t/precheck.t +t/utf8.mbox t/view.t diff --git a/examples/public-inbox.psgi b/examples/public-inbox.psgi new file mode 100644 index 00000000..6d8fd24c --- /dev/null +++ b/examples/public-inbox.psgi @@ -0,0 +1,12 @@ +#!/usr/bin/perl -w +# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# Note: this is part of our test suite, update t/plack.t if this changes +use strict; +use warnings; +require PublicInbox::WWW; +require Plack::Request; +sub { + my $req = Plack::Request->new(@_); + PublicInbox::WWW::run($req, $req->method); +}; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm new file mode 100644 index 00000000..6d9550b4 --- /dev/null +++ b/lib/PublicInbox/WWW.pm @@ -0,0 +1,208 @@ +# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# We focus on the lowest common denominators here: +# - targeted at text-only console browsers (lynx, w3m, etc..) +# - Only basic HTML, CSS only for line-wrapping <pre> text content for GUIs +# - No JavaScript, graphics or icons allowed. +# - Must not rely on static content +# - UTF-8 is only for user-content, 7-bit US-ASCII for us +package PublicInbox::WWW; +use 5.008; +use strict; +use warnings; +use PublicInbox::Config; +use URI::Escape qw(uri_escape_utf8 uri_unescape); +our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; +our $pi_config; +BEGIN { + $pi_config = PublicInbox::Config->new; +} + +sub run { + my ($cgi, $method) = @_; + my %ctx; + if ($method !~ /\AGET|HEAD\z/) { + return r(405, 'Method Not Allowed'); + } + my $path_info = $cgi->path_info; + + # top-level indices and feeds + if ($path_info eq '/') { + r404(); + } elsif ($path_info =~ m!$LISTNAME_RE\z!o) { + invalid_list(\%ctx, $1) || redirect_list_index(\%ctx, $cgi); + } elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) { + invalid_list(\%ctx, $1) || get_index(\%ctx, $cgi, 0); + } elsif ($path_info =~ m!$LISTNAME_RE/atom\.xml\z!o) { + invalid_list(\%ctx, $1) || get_atom(\%ctx, $cgi, 0); + + # single-message pages + } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) { + invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx, $cgi); + } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) { + invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx, $cgi); + + # full-message page + } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) { + invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx, $cgi); + + # convenience redirects, order matters + } elsif ($path_info =~ m!$LISTNAME_RE/(?:m|f)/(\S+)\z!o) { + invalid_list_mid(\%ctx, $1, $2) || redirect_mid(\%ctx, $cgi); + + } else { + r404(); + } +} + +# for CoW-friendliness, MOOOOO! +sub preload { + require PublicInbox::Feed; + require PublicInbox::View; + require Mail::Thread; + require Email::MIME; + require Digest::SHA; + require POSIX; + require XML::Atom::SimpleFeed; +} + +# private functions below + +sub r404 { r(404, 'Not Found') } + +# simple response for errors +sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } + +# returns undef if valid, array ref response if invalid +sub invalid_list { + my ($ctx, $listname) = @_; + my $git_dir = $pi_config->get($listname, "mainrepo"); + if (defined $git_dir) { + $ctx->{git_dir} = $git_dir; + $ctx->{listname} = $listname; + return; + } + r404(); +} + +# returns undef if valid, array ref response if invalid +sub invalid_list_mid { + my ($ctx, $listname, $mid) = @_; + my $ret = invalid_list($ctx, $listname, $mid); + $ctx->{mid} = uri_unescape($mid) unless $ret; + $ret; +} + +# /$LISTNAME/atom.xml -> Atom feed, includes replies +sub get_atom { + my ($ctx, $cgi, $top) = @_; + require PublicInbox::Feed; + [ 200, [ 'Content-Type' => 'application/xml' ], + [ PublicInbox::Feed->generate({ + git_dir => $ctx->{git_dir}, + listname => $ctx->{listname}, + pi_config => $pi_config, + cgi => $cgi, + top => $top, + }) ] + ]; +} + +# /$LISTNAME/?r=$GIT_COMMIT -> HTML only +sub get_index { + my ($ctx, $cgi, $top) = @_; + require PublicInbox::Feed; + [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ], + [ PublicInbox::Feed->generate_html_index({ + git_dir => $ctx->{git_dir}, + listname => $ctx->{listname}, + pi_config => $pi_config, + cgi => $cgi, + top => $top, + }) ] + ]; +} + +# just returns a string ref for the blob in the current ctx +sub mid2blob { + my ($ctx) = @_; + require Digest::SHA; + my $hex = Digest::SHA::sha1_hex($ctx->{mid}); + $hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/i or + die "BUG: not a SHA-1 hex: $hex"; + + my @cmd = ('git', "--git-dir=$ctx->{git_dir}", + qw(cat-file blob), "HEAD:$1/$2"); + my $cmd = join(' ', @cmd); + my $pid = open my $fh, '-|'; + defined $pid or die "fork failed: $!\n"; + if ($pid == 0) { + open STDERR, '>', '/dev/null'; # ignore errors + exec @cmd or die "exec failed: $!\n"; + } else { + my $blob = eval { local $/; <$fh> }; + close $fh; + $? == 0 ? \$blob : undef; + } +} + +# /$LISTNAME/m/$MESSAGE_ID.txt -> raw original +sub get_mid_txt { + my ($ctx, $cgi) = @_; + my $x = mid2blob($ctx); + $x ? [ 200, [ 'Content-Type' => 'text/plain' ], [ $$x ] ] : r404(); +} + +# /$LISTNAME/m/$MESSAGE_ID.html -> HTML content (short quotes) +sub get_mid_html { + my ($ctx, $cgi) = @_; + my $x = mid2blob($ctx); + return r404() unless $x; + + require PublicInbox::View; + my $mid_href = PublicInbox::Hval::ascii_html( + uri_escape_utf8($ctx->{mid})); + my $pfx = "../f/$mid_href.html"; + require Email::MIME; + [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ], + [ PublicInbox::View->as_html(Email::MIME->new($$x), $pfx) ] ]; +} + +# /$LISTNAME/f/$MESSAGE_ID.html -> HTML content (fullquotes) +sub get_full_html { + my ($ctx, $cgi) = @_; + my $x = mid2blob($ctx); + return r404() unless $x; + require PublicInbox::View; + require Email::MIME; + [ 200, [ 'Content-Type' => 'text/html' ], + [ PublicInbox::View->as_html(Email::MIME->new($$x))] ]; +} + +sub self_url { + my ($cgi) = @_; + ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string; +} + +sub redirect_list_index { + my ($ctx, $cgi) = @_; + do_redirect(self_url($cgi) . "/"); +} + +sub redirect_mid { + my ($ctx, $cgi) = @_; + my $url = self_url($cgi); + $url =~ s!/f/!/m/!; + do_redirect($url . '.html'); +} + +sub do_redirect { + my ($url) = @_; + [ 301, + [ Location => $url, 'Content-Type' => 'text/plain' ], + [ "Redirecting to $url\n" ] + ] +} + +1; diff --git a/public-inbox.cgi b/public-inbox.cgi index 1d43b86f..57935c50 100755 --- a/public-inbox.cgi +++ b/public-inbox.cgi @@ -1,246 +1,28 @@ #!/usr/bin/perl -w # Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) -# -# We focus on the lowest common denominators here: -# - targeted at text-only console browsers (lynx, w3m, etc..) -# - Only basic HTML, CSS only for line-wrapping <pre> text content for GUIs -# - No JavaScript, graphics or icons allowed. -# - Must not rely on static content -# - UTF-8 is only for user-content, 7-bit US-ASCII for us - -use 5.008; use strict; use warnings; -use PublicInbox::Config; -use URI::Escape qw(uri_escape_utf8 uri_unescape); -our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; -our $NO_SCRIPT_NAME; # for prettier redirects with mod_perl2 -our $pi_config; +require PublicInbox::WWW; +use CGI qw/-nosticky/; +our $NO_SCRIPT_NAME; BEGIN { - $pi_config = PublicInbox::Config->new; - # TODO: detect and reload config as needed $NO_SCRIPT_NAME = 1 if $ENV{NO_SCRIPT_NAME}; - if ($ENV{MOD_PERL}) { - require CGI; - no warnings; - $CGI::NOSTICKY = 1; - CGI->compile; - } -} - -if ($ENV{PI_PLACKUP}) { - psgi_app(); -} else { - # some servers (Ruby webrick) include scheme://host[:port] here, - # which confuses CGI.pm when generating self_url. - # RFC 3875 does not mention REQUEST_URI at all, - # so nuke it since CGI.pm functions without it. - require CGI; - delete $ENV{REQUEST_URI}; - $ENV{SCRIPT_NAME} = '' if $NO_SCRIPT_NAME; - my $req = CGI->new; - my $ret = main($req, $req->request_method); - binmode STDOUT; - if (@ARGV && $ARGV[0] eq 'static') { - print $ret->[2]->[0]; - } else { # CGI - cgi_print($ret); - } -} - -# private functions below - -sub main { - my ($cgi, $method) = @_; - my %ctx; - if ($method !~ /\AGET|HEAD\z/) { - return r(405, 'Method Not Allowed'); - } - my $path_info = $cgi->path_info; - - # top-level indices and feeds - if ($path_info eq '/') { - r404(); - } elsif ($path_info =~ m!$LISTNAME_RE\z!o) { - invalid_list(\%ctx, $1) || redirect_list_index(\%ctx, $cgi); - } elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) { - invalid_list(\%ctx, $1) || get_index(\%ctx, $cgi, 0); - } elsif ($path_info =~ m!$LISTNAME_RE/atom\.xml\z!o) { - invalid_list(\%ctx, $1) || get_atom(\%ctx, $cgi, 0); - - # single-message pages - } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx, $cgi); - } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx, $cgi); - - # full-message page - } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx, $cgi); - - # convenience redirects, order matters - } elsif ($path_info =~ m!$LISTNAME_RE/(?:m|f)/(\S+)\z!o) { - invalid_list_mid(\%ctx, $1, $2) || redirect_mid(\%ctx, $cgi); - - } else { - r404(); - } -} - -sub r404 { r(404, 'Not Found') } - -# simple response for errors -sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } - -# returns undef if valid, array ref response if invalid -sub invalid_list { - my ($ctx, $listname) = @_; - my $git_dir = $pi_config->get($listname, "mainrepo"); - if (defined $git_dir) { - $ctx->{git_dir} = $git_dir; - $ctx->{listname} = $listname; - return; - } - r404(); -} - -# returns undef if valid, array ref response if invalid -sub invalid_list_mid { - my ($ctx, $listname, $mid) = @_; - my $ret = invalid_list($ctx, $listname, $mid); - $ctx->{mid} = uri_unescape($mid) unless $ret; - $ret; -} - -# /$LISTNAME/atom.xml -> Atom feed, includes replies -sub get_atom { - my ($ctx, $cgi, $top) = @_; - require PublicInbox::Feed; - [ 200, [ 'Content-Type' => 'application/xml' ], - [ PublicInbox::Feed->generate({ - git_dir => $ctx->{git_dir}, - listname => $ctx->{listname}, - pi_config => $pi_config, - cgi => $cgi, - top => $top, - }) ] - ]; -} - -# /$LISTNAME/?r=$GIT_COMMIT -> HTML only -sub get_index { - my ($ctx, $cgi, $top) = @_; - require PublicInbox::Feed; - [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ], - [ PublicInbox::Feed->generate_html_index({ - git_dir => $ctx->{git_dir}, - listname => $ctx->{listname}, - pi_config => $pi_config, - cgi => $cgi, - top => $top, - }) ] - ]; -} - -# just returns a string ref for the blob in the current ctx -sub mid2blob { - my ($ctx) = @_; - require Digest::SHA; - my $hex = Digest::SHA::sha1_hex($ctx->{mid}); - $hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/i or - die "BUG: not a SHA-1 hex: $hex"; - - my @cmd = ('git', "--git-dir=$ctx->{git_dir}", - qw(cat-file blob), "HEAD:$1/$2"); - my $cmd = join(' ', @cmd); - my $pid = open my $fh, '-|'; - defined $pid or die "fork failed: $!\n"; - if ($pid == 0) { - open STDERR, '>', '/dev/null'; # ignore errors - exec @cmd or die "exec failed: $!\n"; - } else { - my $blob = eval { local $/; <$fh> }; - close $fh; - $? == 0 ? \$blob : undef; - } -} - -# /$LISTNAME/m/$MESSAGE_ID.txt -> raw original -sub get_mid_txt { - my ($ctx, $cgi) = @_; - my $x = mid2blob($ctx); - $x ? [ 200, [ 'Content-Type' => 'text/plain' ], [ $$x ] ] : r404(); -} - -# /$LISTNAME/m/$MESSAGE_ID.html -> HTML content (short quotes) -sub get_mid_html { - my ($ctx, $cgi) = @_; - my $x = mid2blob($ctx); - return r404() unless $x; - - require PublicInbox::View; - my $mid_href = PublicInbox::Hval::ascii_html( - uri_escape_utf8($ctx->{mid})); - my $pfx = "../f/$mid_href.html"; - require Email::MIME; - [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ], - [ PublicInbox::View->as_html(Email::MIME->new($$x), $pfx) ] ]; -} - -# /$LISTNAME/f/$MESSAGE_ID.html -> HTML content (fullquotes) -sub get_full_html { - my ($ctx, $cgi) = @_; - my $x = mid2blob($ctx); - return r404() unless $x; - require PublicInbox::View; - require Email::MIME; - [ 200, [ 'Content-Type' => 'text/html' ], - [ PublicInbox::View->as_html(Email::MIME->new($$x))] ]; -} - -sub self_url { - my ($cgi) = @_; - ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string; -} - -sub redirect_list_index { - my ($ctx, $cgi) = @_; - do_redirect(self_url($cgi) . "/"); -} - -sub redirect_mid { - my ($ctx, $cgi) = @_; - my $url = self_url($cgi); - $url =~ s!/f/!/m/!; - do_redirect($url . '.html'); -} - -sub do_redirect { - my ($url) = @_; - [ 301, - [ Location => $url, 'Content-Type' => 'text/plain' ], - [ "Redirecting to $url\n" ] - ] -} - -sub psgi_app { - # preload so we are CoW friendly - require PublicInbox::Feed; - require PublicInbox::View; - require Mail::Thread; - require Digest::SHA; - require POSIX; - require XML::Atom::SimpleFeed; - require Plack::Request; - sub { - my $req = Plack::Request->new(@_); - main($req, $req->method); - }; -} - -sub cgi_print { - my ($ret) = @_; + CGI->compile if $ENV{MOD_PERL}; +} + +# some servers (Ruby webrick) include scheme://host[:port] here, +# which confuses CGI.pm when generating self_url. +# RFC 3875 does not mention REQUEST_URI at all, +# so nuke it since CGI.pm functions without it. +delete $ENV{REQUEST_URI}; +$ENV{SCRIPT_NAME} = '' if $NO_SCRIPT_NAME; +my $req = CGI->new; +my $ret = PublicInbox::WWW::run($req, $req->request_method); +binmode STDOUT; +if (@ARGV && $ARGV[0] eq 'static') { + print $ret->[2]->[0]; # only show the body +} else { # CGI my ($status, $headers, $body) = @$ret; my %codes = ( 200 => 'OK', diff --git a/t/plack.t b/t/plack.t new file mode 100644 index 00000000..3bc4433f --- /dev/null +++ b/t/plack.t @@ -0,0 +1,112 @@ +# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +use strict; +use warnings; +use Test::More; +use Email::MIME; +use File::Temp qw/tempdir/; +use Cwd; +use IPC::Run qw/run/; +my $psgi = "examples/public-inbox.psgi"; +my $mda = "blib/script/public-inbox-mda"; +my $tmpdir = tempdir(CLEANUP => 1); +my $home = "$tmpdir/pi-home"; +my $pi_home = "$home/.public-inbox"; +my $pi_config = "$pi_home/config"; +my $maindir = "$tmpdir/main.git"; +my $main_bin = getcwd()."/t/main-bin"; +my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock +my $addr = 'test-public@example.com'; +my $cfgpfx = "publicinbox.test"; +my $failbox = "$home/fail.mbox"; +local $ENV{PI_EMERGENCY} = $failbox; + +our $have_plack; +eval { + require Plack::Request; + eval 'use Plack::Test; use HTTP::Request::Common'; + $have_plack = 1; +}; +SKIP: { + skip 'Plack not installed', 1 unless $have_plack; + ok(-f $psgi, "psgi example file found"); + ok(-x "$main_bin/spamc", + "spamc ham mock found (run in top of source tree"); + ok(-x $mda, "$mda is executable"); + is(1, mkdir($home, 0755), "setup ~/ for testing"); + is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox"); + is(0, system(qw(git init -q --bare), $maindir), "git init (main)"); + open my $fh, '>', "$maindir/description" or die "open: $!\n"; + print $fh "test for public-inbox\n"; + close $fh or die "close: $!\n"; + my %cfg = ( + "$cfgpfx.address" => $addr, + "$cfgpfx.mainrepo" => $maindir, + ); + while (my ($k,$v) = each %cfg) { + is(0, system(qw(git config --file), $pi_config, $k, $v), + "setup $k"); + } + + local $ENV{HOME} = $home; + local $ENV{ORIGINAL_RECIPIENT} = $addr; + + # ensure successful message delivery + { + my $simple = Email::Simple->new(<<EOF); +From: Me <me\@example.com> +To: You <you\@example.com> +Cc: $addr +Message-Id: <blah\@example.com> +Subject: hihi +Date: Thu, 01 Jan 1970 00:00:00 +0000 + +zzzzzz +EOF + my $in = $simple->as_string; + run_with_env({PATH => $main_path}, [$mda], \$in); + local $ENV{GIT_DIR} = $maindir; + my $rev = `git rev-list HEAD`; + like($rev, qr/\A[a-f0-9]{40}/, "good revision committed"); + } + my $app = require $psgi; + + # redirect with trailing / + test_psgi($app, sub { + my ($cb) = @_; + my $from = 'http://example.com/test'; + my $to = "$from/"; + my $res = $cb->(GET($from)); + is(301, $res->code, 'is permanent redirect'); + is($to, $res->header('Location'), 'redirect location matches'); + }); + + test_psgi($app, sub { + my ($cb) = @_; + my $atomurl = 'http://example.com/test/atom.xml'; + my $res = $cb->(GET('http://example.com/test/')); + is(200, $res->code, 'success response received'); + like($res->content, qr!href="\Q$atomurl\E"!, + 'atom URL generated'); + like($res->content, qr!href="m/blah%40example\.com\.html"!, + 'index generated'); + }); + + test_psgi($app, sub { + my ($cb) = @_; + my $pfx = 'http://example.com/test'; + my $res = $cb->(GET($pfx . '/atom.xml')); + is(200, $res->code, 'success response received for atom'); + like($res->content, + qr!link href="\Q$pfx\E/m/blah%40example\.com\.html"!, + 'atom feed generated correct URL'); + }); +} + +done_testing(); + +sub run_with_env { + my ($env, @args) = @_; + my $init = sub { foreach my $k (keys %$env) { $ENV{$k} = $env->{$k} } }; + run(@args, init => $init); +} |