From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS8972 85.25.103.0/24 X-Spam-Status: No, score=-2.1 required=3.0 tests=AWL,BAYES_00, RCVD_IN_MSPIKE_BL,RCVD_IN_MSPIKE_ZBI,RCVD_IN_XBL,SPF_FAIL,SPF_HELO_FAIL, TO_EQ_FM_DOM_SPF_FAIL shortcircuit=no autolearn=no autolearn_force=no version=3.4.0 Received: from 80x24.org (atlantic850.dedicatedpanel.com [85.25.103.69]) by dcvr.yhbt.net (Postfix) with ESMTP id EF3511FD99 for ; Thu, 18 Aug 2016 02:16:06 +0000 (UTC) From: Eric Wong To: spew@80x24.org Subject: [PATCH] wip Date: Thu, 18 Aug 2016 02:16:05 +0000 Message-Id: <20160818021605.20176-1-e@80x24.org> List-Id: --- MANIFEST | 2 + lib/PublicInbox/Search.pm | 30 +++++++- lib/PublicInbox/WWW.pm | 14 ++++ lib/PublicInbox/WwwStream.pm | 4 +- lib/PublicInbox/WwwText.pm | 160 +++++++++++++++++++++++++++++++++++++++++++ t/psgi_text.t | 39 +++++++++++ 6 files changed, 247 insertions(+), 2 deletions(-) create mode 100644 lib/PublicInbox/WwwText.pm create mode 100644 t/psgi_text.t diff --git a/MANIFEST b/MANIFEST index bed6050..306945a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -82,6 +82,7 @@ lib/PublicInbox/WWW.pm lib/PublicInbox/WatchMaildir.pm lib/PublicInbox/WwwAttach.pm lib/PublicInbox/WwwStream.pm +lib/PublicInbox/WwwText.pm sa_config/Makefile sa_config/README sa_config/root/etc/spamassassin/public-inbox.pre @@ -141,6 +142,7 @@ t/plack.t t/precheck.t t/psgi_attach.t t/psgi_mount.t +t/psgi_text.t t/qspawn.t t/search.t t/spamcheck_spamc.t diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 7561ef4..4fff1e4 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -51,6 +51,7 @@ my %bool_pfx_internal = ( thread => 'G', # newsGroup (or similar entity - e.g. a web forum name) ); +# do we still need these? probably not.. my %bool_pfx_external = ( path => 'XPATH', mid => 'Q', # uniQue id (Message-ID) @@ -62,6 +63,26 @@ my %prob_prefix = ( m => 'Q', # 'mid' is exact, 'm' can do partial ); +# not documenting m: and mid:, the using the URLs works w/o Xapian +our $HELP = <{altid}) { for (@$altid) { - # $_ = 'serial:gmane:/path/to/gmane.msgmap.sqlite3' + # $_ = 'serial:gmane:file=/path/to/gmane.msgmap.sqlite3' /\Aserial:(\w+):/ or next; my $pfx = $1; # gmane => XGMANE @@ -321,4 +342,11 @@ sub enquire { $self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb}); } +sub help { + my ($self) = @_; + my $ret = [ $HELP ]; + # TODO: merge altid prefixes + $ret; +} + 1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 6f6a003..6d6349a 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -96,6 +96,8 @@ sub call { } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) { r301($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/_/text/(.+)\z!o) { + get_text($ctx, $1, $2); # convenience redirects order matters } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) { @@ -238,6 +240,18 @@ sub get_thread { PublicInbox::View::thread_html($ctx); } +# /$INBOX/_/text/$KEY/ +# /$INBOX/_/text/$KEY/raw +# KEY may contain slashes +sub get_text { + my ($ctx, $inbox, $key) = @_; + my $r404 = invalid_inbox($ctx, $inbox); + return $r404 if $r404; + + require PublicInbox::WwwText; + PublicInbox::WwwText::get_text($ctx, $key); +} + sub ctx_get { my ($ctx, $key) = @_; my $val = $ctx->{$key}; diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 9ed25e1..c89e6de 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -31,10 +31,12 @@ sub _html_top ($) { my $desc = ascii_html($obj->description); my $title = $ctx->{-title_html} || $desc; my $upfx = $ctx->{-upfx} || ''; + my $help = $upfx.'_/text/help'; my $atom = $ctx->{-atom} || $upfx.'new.atom'; my $tip = $ctx->{-html_tip} || ''; my $top = "$desc"; - my $links = "Atom feed"; + my $links = "help / ". + "Atom feed"; if ($obj->search) { my $q_val = $ctx->{-q_value_html}; if (defined $q_val && $q_val ne '') { diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm new file mode 100644 index 0000000..51a91d1 --- /dev/null +++ b/lib/PublicInbox/WwwText.pm @@ -0,0 +1,160 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +# +# serves the /$INBOX/_/* endpoints from :text/* of the git tree +package PublicInbox::WwwText; +use strict; +use warnings; +use PublicInbox::Linkify; +use PublicInbox::WwwStream; +use PublicInbox::Hval qw(ascii_html); +our $QP_URL = 'https://xapian.org/docs/queryparser.html'; +our $WIKI_URL = 'https://en.wikipedia.org/wiki'; + +# /$INBOX/_/text/$KEY/ # KEY may contain slashes +sub get_text { + my ($ctx, $key) = @_; + my $code = 200; + + # get the raw text the same way we get mboxrds + my $raw = ($key =~ s!/raw\z!!); + my $have_tslash = ($key =~ s!/\z!!) if !$raw; + + my $txt = ''; + if (!_default_text($ctx, $key, \$txt)) { + $code = 404; + $txt = "404 Not Found ($key)\n"; + } + if ($raw) { + return [ $code, [ 'Content-Type', 'text/plain', + 'Content-Length', bytes::length($txt) ], + [ $txt ] ] + } + + # enforce trailing slash for "wget -r" compatibility + if (!$have_tslash && $code == 200) { + my $url = $ctx->{-inbox}->base_url($ctx->{env}); + $url .= "_/text/$key/"; + + return [ 302, [ 'Content-Type', 'text/plain', + 'Location', $url ], + [ "Redirecting to $url\n" ] ]; + } + + # Follow git commit message conventions, + # first line is the Subject/title + my ($title) = ($txt =~ /\A([^\n]*)/s); + _do_linkify($txt); + $ctx->{-title_html} = ascii_html($title); + + my $nslash = ($key =~ tr!/!/!); + $ctx->{-upfx} = '../../../' . ('../' x $nslash); + + PublicInbox::WwwStream->response($ctx, $code, sub { + my ($nr, undef) = @_; + $nr == 1 ? '
'.$txt.'
' : undef + }); +} + +sub _do_linkify { + my $l = PublicInbox::Linkify->new; + $_[0] = $l->linkify_2(ascii_html($l->linkify_1($_[0]))); +} + +sub _default_text ($$$) { + my ($ctx, $key, $txt) = @_; + return if $key ne 'help'; # TODO more keys? + + my $ibx = $ctx->{-inbox}; + my $base_url = $ibx->base_url($ctx->{env}); + $$txt .= "public-inbox help for $base_url\n"; + $$txt .= <') into the URL. + Forward slash ('/') characters in the Message-IDs + need to be escaped as "%2F" (without quotes). + + Thus, it is possible to retrieve any message by its + Message-ID by going to: + + $base_url/ + + (without the '<' or '>') + + Message-IDs are described at: + + $WIKI_URL/Message-ID + +EOF + + # n.b. we use the Xapian DB for any regeneratable, + # order-of-arrival-independent data. + my $srch = $ibx->search; + if ($srch) { + $$txt .= <help; + foreach my $h (@$help) { + $h =~ s/^/\t/sgm; + $$txt .= $h; + $$txt .= "\n"; + } + + $$txt .= </T/#u + + Loads the thread belonging to the given + in flat chronological order. The "#u" anchor + focuses the browser on the given . + + * $base_url/t/#u + + Loads the thread belonging to the given + in threaded order with nesting. For deep threads, + this requires a wide display or horizontal scrolling. + + Both of these HTML endpoints are suitable for offline reading + using the thread overview at the bottom of each page. + + Users of feed readers may follow a particular thread using: + + * $base_url/t.atom + + $WIKI_URL/Atom_(standard) + + Finally, the gzipped mbox for a thread is available for + downloading and importing into your favorite mail client: + + * $base_url/t.mbox.gz + + $WIKI_URL/Mbox + (we use the mboxrd variant) +EOF + } + 1; +} + +1; diff --git a/t/psgi_text.t b/t/psgi_text.t new file mode 100644 index 0000000..bf565f8 --- /dev/null +++ b/t/psgi_text.t @@ -0,0 +1,39 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use Email::MIME; +use File::Temp qw/tempdir/; +my $tmpdir = tempdir('psgi-text-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $maindir = "$tmpdir/main.git"; +my $addr = 'test-public@example.com'; +my $cfgpfx = "publicinbox.test"; +my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for psgi_text.t" if $@; +} +use_ok $_ foreach @mods; +use PublicInbox::Import; +use PublicInbox::Git; +use PublicInbox::Config; +use PublicInbox::WWW; +use_ok 'PublicInbox::WwwText'; +use Plack::Builder; +my $config = PublicInbox::Config->new({ + "$cfgpfx.address" => $addr, + "$cfgpfx.mainrepo" => $maindir, +}); +is(0, system(qw(git init -q --bare), $maindir), "git init (main)"); +my $www = PublicInbox::WWW->new($config); + +test_psgi(sub { $www->call(@_) }, sub { + my ($cb) = @_; + my $res; + $res = $cb->(GET('/test/_/text/help/')); + like($res->content, qr!public-inbox help.*!, + 'default help'); +}); + +done_testing(); -- EW