From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] wip
Date: Thu, 18 Aug 2016 02:16:05 +0000 [thread overview]
Message-ID: <20160818021605.20176-1-e@80x24.org> (raw)
---
MANIFEST | 2 +
lib/PublicInbox/Search.pm | 30 +++++++-
lib/PublicInbox/WWW.pm | 14 ++++
lib/PublicInbox/WwwStream.pm | 4 +-
lib/PublicInbox/WwwText.pm | 160 +++++++++++++++++++++++++++++++++++++++++++
t/psgi_text.t | 39 +++++++++++
6 files changed, 247 insertions(+), 2 deletions(-)
create mode 100644 lib/PublicInbox/WwwText.pm
create mode 100644 t/psgi_text.t
diff --git a/MANIFEST b/MANIFEST
index bed6050..306945a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -82,6 +82,7 @@ lib/PublicInbox/WWW.pm
lib/PublicInbox/WatchMaildir.pm
lib/PublicInbox/WwwAttach.pm
lib/PublicInbox/WwwStream.pm
+lib/PublicInbox/WwwText.pm
sa_config/Makefile
sa_config/README
sa_config/root/etc/spamassassin/public-inbox.pre
@@ -141,6 +142,7 @@ t/plack.t
t/precheck.t
t/psgi_attach.t
t/psgi_mount.t
+t/psgi_text.t
t/qspawn.t
t/search.t
t/spamcheck_spamc.t
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 7561ef4..4fff1e4 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -51,6 +51,7 @@ my %bool_pfx_internal = (
thread => 'G', # newsGroup (or similar entity - e.g. a web forum name)
);
+# do we still need these? probably not..
my %bool_pfx_external = (
path => 'XPATH',
mid => 'Q', # uniQue id (Message-ID)
@@ -62,6 +63,26 @@ my %prob_prefix = (
m => 'Q', # 'mid' is exact, 'm' can do partial
);
+# not documenting m: and mid:, the using the URLs works w/o Xapian
+our $HELP = <<EOF;
+s: match within Subject only e.g. s:"a quick brown fox"
+ This is a probabilistic search with support for stemming
+ and wildcards '*'.
+
+d: date range as YYYYMMDD e.g. d:19931002..20101002
+ Open-ended ranges such as d:19931002.. and d:..20101002
+ are also supported.
+EOF
+# TODO: (from mairix, some of these are maybe)
+# b (body), f (From:), c (Cc:), n (attachment), t (To:)
+# tc (To:+Cc:), bs (body + Subject), tcf (To: +Cc: +From:)
+#
+# Non-mairix:
+# df (filenames from diff)
+# nq (non-quoted body)
+# da (diff a/ removed lines)
+# db (diff b/ added lines)
+
my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
sub xpfx { $all_pfx{$_[0]} }
@@ -191,7 +212,7 @@ sub qp {
# just parse the spec to avoid the extra DB handles for now.
if (my $altid = $self->{altid}) {
for (@$altid) {
- # $_ = 'serial:gmane:/path/to/gmane.msgmap.sqlite3'
+ # $_ = 'serial:gmane:file=/path/to/gmane.msgmap.sqlite3'
/\Aserial:(\w+):/ or next;
my $pfx = $1;
# gmane => XGMANE
@@ -321,4 +342,11 @@ sub enquire {
$self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb});
}
+sub help {
+ my ($self) = @_;
+ my $ret = [ $HELP ];
+ # TODO: merge altid prefixes
+ $ret;
+}
+
1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 6f6a003..6d6349a 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -96,6 +96,8 @@ sub call {
} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) {
r301($ctx, $1, $2);
+ } elsif ($path_info =~ m!$INBOX_RE/_/text/(.+)\z!o) {
+ get_text($ctx, $1, $2);
# convenience redirects order matters
} elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
@@ -238,6 +240,18 @@ sub get_thread {
PublicInbox::View::thread_html($ctx);
}
+# /$INBOX/_/text/$KEY/
+# /$INBOX/_/text/$KEY/raw
+# KEY may contain slashes
+sub get_text {
+ my ($ctx, $inbox, $key) = @_;
+ my $r404 = invalid_inbox($ctx, $inbox);
+ return $r404 if $r404;
+
+ require PublicInbox::WwwText;
+ PublicInbox::WwwText::get_text($ctx, $key);
+}
+
sub ctx_get {
my ($ctx, $key) = @_;
my $val = $ctx->{$key};
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 9ed25e1..c89e6de 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -31,10 +31,12 @@ sub _html_top ($) {
my $desc = ascii_html($obj->description);
my $title = $ctx->{-title_html} || $desc;
my $upfx = $ctx->{-upfx} || '';
+ my $help = $upfx.'_/text/help';
my $atom = $ctx->{-atom} || $upfx.'new.atom';
my $tip = $ctx->{-html_tip} || '';
my $top = "<b>$desc</b>";
- my $links = "<a\nhref=\"$atom\">Atom feed</a>";
+ my $links = "<a\nhref=\"$help\">help</a> / ".
+ "<a\nhref=\"$atom\">Atom feed</a>";
if ($obj->search) {
my $q_val = $ctx->{-q_value_html};
if (defined $q_val && $q_val ne '') {
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
new file mode 100644
index 0000000..51a91d1
--- /dev/null
+++ b/lib/PublicInbox/WwwText.pm
@@ -0,0 +1,160 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# serves the /$INBOX/_/* endpoints from :text/* of the git tree
+package PublicInbox::WwwText;
+use strict;
+use warnings;
+use PublicInbox::Linkify;
+use PublicInbox::WwwStream;
+use PublicInbox::Hval qw(ascii_html);
+our $QP_URL = 'https://xapian.org/docs/queryparser.html';
+our $WIKI_URL = 'https://en.wikipedia.org/wiki';
+
+# /$INBOX/_/text/$KEY/ # KEY may contain slashes
+sub get_text {
+ my ($ctx, $key) = @_;
+ my $code = 200;
+
+ # get the raw text the same way we get mboxrds
+ my $raw = ($key =~ s!/raw\z!!);
+ my $have_tslash = ($key =~ s!/\z!!) if !$raw;
+
+ my $txt = '';
+ if (!_default_text($ctx, $key, \$txt)) {
+ $code = 404;
+ $txt = "404 Not Found ($key)\n";
+ }
+ if ($raw) {
+ return [ $code, [ 'Content-Type', 'text/plain',
+ 'Content-Length', bytes::length($txt) ],
+ [ $txt ] ]
+ }
+
+ # enforce trailing slash for "wget -r" compatibility
+ if (!$have_tslash && $code == 200) {
+ my $url = $ctx->{-inbox}->base_url($ctx->{env});
+ $url .= "_/text/$key/";
+
+ return [ 302, [ 'Content-Type', 'text/plain',
+ 'Location', $url ],
+ [ "Redirecting to $url\n" ] ];
+ }
+
+ # Follow git commit message conventions,
+ # first line is the Subject/title
+ my ($title) = ($txt =~ /\A([^\n]*)/s);
+ _do_linkify($txt);
+ $ctx->{-title_html} = ascii_html($title);
+
+ my $nslash = ($key =~ tr!/!/!);
+ $ctx->{-upfx} = '../../../' . ('../' x $nslash);
+
+ PublicInbox::WwwStream->response($ctx, $code, sub {
+ my ($nr, undef) = @_;
+ $nr == 1 ? '<pre>'.$txt.'</pre>' : undef
+ });
+}
+
+sub _do_linkify {
+ my $l = PublicInbox::Linkify->new;
+ $_[0] = $l->linkify_2(ascii_html($l->linkify_1($_[0])));
+}
+
+sub _default_text ($$$) {
+ my ($ctx, $key, $txt) = @_;
+ return if $key ne 'help'; # TODO more keys?
+
+ my $ibx = $ctx->{-inbox};
+ my $base_url = $ibx->base_url($ctx->{env});
+ $$txt .= "public-inbox help for $base_url\n";
+ $$txt .= <<EOF;
+
+overview
+--------
+
+ public-inbox uses Message-ID identifiers in URLs.
+ One may look up messages by substituting Message-IDs
+ (without the leading '<' or trailing '>') into the URL.
+ Forward slash ('/') characters in the Message-IDs
+ need to be escaped as "%2F" (without quotes).
+
+ Thus, it is possible to retrieve any message by its
+ Message-ID by going to:
+
+ $base_url<Message-ID>/
+
+ (without the '<' or '>')
+
+ Message-IDs are described at:
+
+ $WIKI_URL/Message-ID
+
+EOF
+
+ # n.b. we use the Xapian DB for any regeneratable,
+ # order-of-arrival-independent data.
+ my $srch = $ibx->search;
+ if ($srch) {
+ $$txt .= <<EOF;
+search
+------
+
+ This public-inbox has search functionality provided by Xapian.
+
+ It supports typical AND, OR, NOT, '+', '-' queries present
+ in other search engines.
+
+ Prefixes supported:
+
+EOF
+ my $help = $srch->help;
+ foreach my $h (@$help) {
+ $h =~ s/^/\t/sgm;
+ $$txt .= $h;
+ $$txt .= "\n";
+ }
+
+ $$txt .= <<EOF;
+ See $QP_URL for more details.
+
+message threading
+-----------------
+
+ Message threading is enabled for this public-inbox,
+ additional endpoints for message threads are available:
+
+ * $base_url<Message-ID>/T/#u
+
+ Loads the thread belonging to the given <Message-ID>
+ in flat chronological order. The "#u" anchor
+ focuses the browser on the given <Message-ID>.
+
+ * $base_url<Message-ID>/t/#u
+
+ Loads the thread belonging to the given <Message-ID>
+ in threaded order with nesting. For deep threads,
+ this requires a wide display or horizontal scrolling.
+
+ Both of these HTML endpoints are suitable for offline reading
+ using the thread overview at the bottom of each page.
+
+ Users of feed readers may follow a particular thread using:
+
+ * $base_url<Message-ID>/t.atom
+
+ $WIKI_URL/Atom_(standard)
+
+ Finally, the gzipped mbox for a thread is available for
+ downloading and importing into your favorite mail client:
+
+ * $base_url<Message-ID>/t.mbox.gz
+
+ $WIKI_URL/Mbox
+ (we use the mboxrd variant)
+EOF
+ }
+ 1;
+}
+
+1;
diff --git a/t/psgi_text.t b/t/psgi_text.t
new file mode 100644
index 0000000..bf565f8
--- /dev/null
+++ b/t/psgi_text.t
@@ -0,0 +1,39 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use File::Temp qw/tempdir/;
+my $tmpdir = tempdir('psgi-text-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $maindir = "$tmpdir/main.git";
+my $addr = 'test-public@example.com';
+my $cfgpfx = "publicinbox.test";
+my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape);
+foreach my $mod (@mods) {
+ eval "require $mod";
+ plan skip_all => "$mod missing for psgi_text.t" if $@;
+}
+use_ok $_ foreach @mods;
+use PublicInbox::Import;
+use PublicInbox::Git;
+use PublicInbox::Config;
+use PublicInbox::WWW;
+use_ok 'PublicInbox::WwwText';
+use Plack::Builder;
+my $config = PublicInbox::Config->new({
+ "$cfgpfx.address" => $addr,
+ "$cfgpfx.mainrepo" => $maindir,
+});
+is(0, system(qw(git init -q --bare), $maindir), "git init (main)");
+my $www = PublicInbox::WWW->new($config);
+
+test_psgi(sub { $www->call(@_) }, sub {
+ my ($cb) = @_;
+ my $res;
+ $res = $cb->(GET('/test/_/text/help/'));
+ like($res->content, qr!<title>public-inbox help.*</title>!,
+ 'default help');
+});
+
+done_testing();
--
EW
next reply other threads:[~2016-08-18 2:16 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-08-18 2:16 Eric Wong [this message]
-- strict thread matches above, loose matches on Subject: below --
2021-10-27 20:16 [PATCH] wip Eric Wong
2021-06-05 19:58 Eric Wong
2021-04-05 7:42 Eric Wong
2021-03-08 7:11 Eric Wong
2021-01-21 4:24 [PATCH] WIP Eric Wong
2021-01-03 22:57 [PATCH] wip Eric Wong
2020-12-27 11:36 [PATCH] WIP Eric Wong
2020-11-15 7:35 [PATCH] wip Eric Wong
2020-04-23 4:27 Eric Wong
2020-04-20 7:14 Eric Wong
2020-01-13 9:24 [PATCH] WIP Eric Wong
2019-05-11 22:55 Eric Wong
2019-01-02 9:21 [PATCH] wip Eric Wong
2018-07-06 21:31 Eric Wong
2018-06-24 11:55 Eric Wong
2018-06-24 8:39 Eric Wong
2017-07-15 1:42 [PATCH] WIP Eric Wong
2017-04-12 20:17 [PATCH] wip Eric Wong
2017-04-05 18:40 Eric Wong
2016-08-23 20:07 Eric Wong
2016-06-26 3:46 Eric Wong
2015-12-22 0:15 Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160818021605.20176-1-e@80x24.org \
--to=e@80x24.org \
--cc=spew@80x24.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).