about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-12-26 05:25:36 +0000
committerEric Wong <e@80x24.org>2016-12-26 05:25:36 +0000
commite36899b149ecb7cc56f88a6078b18b211ac3c793 (patch)
treeda8a6e07234a815b3782f1a659f6dee1122959d1
parentb388dfdd96804f898fbf72baf2a32e0c9f0fb3f1 (diff)
parent427245acacaf04a882d5524e662075909b96905b (diff)
downloadpublic-inbox-e36899b149ecb7cc56f88a6078b18b211ac3c793.tar.gz
* origin/master: (25 commits)
  evcleanup: ensure deferred close from timers are handled ASAP
  httpd/async: improve variable naming
  githttpbackend: minor cleanups to improve readability
  githttpbackend: simplify compatibility code
  githttpbackend: minor readability improvement
  http: fix clobbering of $null_io
  linkify: modify argument in place
  view: do not modify array during iteration
  view: stop chomping off whitespace at ends of messages
  view: remove unused parameter
  search: lookup_mail handles modified DBs
  doc: various comments on async handling
  searchthread: simplify API and remove needless OO
  searchthread: update comment about loop prevention
  searchmsg: remove ensure_metadata
  tests: add thread-all testing for benchmarking
  searchmsg: do not memoize {date} field
  searchmsg: remove locale-dependency for ->date
  t/config.t: fix feedmax default
  wwwtext: link to RFC4685 (Atom Threading)
  ...
-rw-r--r--Documentation/public-inbox-config.pod8
-rw-r--r--MANIFEST1
-rw-r--r--TODO3
-rw-r--r--examples/public-inbox.psgi1
-rw-r--r--lib/PublicInbox/Config.pm3
-rw-r--r--lib/PublicInbox/EvCleanup.pm14
-rw-r--r--lib/PublicInbox/Feed.pm5
-rw-r--r--lib/PublicInbox/GitHTTPBackend.pm43
-rw-r--r--lib/PublicInbox/HTTP.pm4
-rw-r--r--lib/PublicInbox/HTTPD.pm2
-rw-r--r--lib/PublicInbox/HTTPD/Async.pm14
-rw-r--r--lib/PublicInbox/Inbox.pm11
-rw-r--r--lib/PublicInbox/Linkify.pm17
-rw-r--r--lib/PublicInbox/Listener.pm6
-rw-r--r--lib/PublicInbox/Search.pm12
-rw-r--r--lib/PublicInbox/SearchMsg.pm52
-rw-r--r--lib/PublicInbox/SearchThread.pm39
-rw-r--r--lib/PublicInbox/SearchView.pm25
-rw-r--r--lib/PublicInbox/View.pm42
-rw-r--r--lib/PublicInbox/WwwAtomStream.pm40
-rw-r--r--lib/PublicInbox/WwwText.pm7
-rw-r--r--t/config.t2
-rw-r--r--t/feed.t13
-rw-r--r--t/search.t2
-rw-r--r--t/thread-all.t38
-rw-r--r--t/thread-cycle.t9
26 files changed, 227 insertions, 186 deletions
diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod
index 00376457..cfd6c93d 100644
--- a/Documentation/public-inbox-config.pod
+++ b/Documentation/public-inbox-config.pod
@@ -120,6 +120,14 @@ addresses or mirrors.
 
 Default: none
 
+=item publicinbox.<name>.feedmax
+
+The size of an Atom feed for the inbox.  If specified more than
+once, only the last value is used.  Invalid values (<= 0) will
+be treated as the default value.
+
+Default: 25
+
 =back
 
 =head1 ENVIRONMENT
diff --git a/MANIFEST b/MANIFEST
index 8e0681a2..59d44bcf 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -186,6 +186,7 @@ t/repobrowse_git_tree.t
 t/search.t
 t/spamcheck_spamc.t
 t/spawn.t
+t/thread-all.t
 t/thread-cycle.t
 t/utf8.mbox
 t/view.t
diff --git a/TODO b/TODO
index d2efcbb6..b85887ad 100644
--- a/TODO
+++ b/TODO
@@ -34,8 +34,6 @@ all need to be considered for everything we introduce)
   the links should point to an anchor tag within the same page,
   instead; giving the user options.
 
-* implement RFC 4685 (Atom message threading)
-
 * configurable constants (index limits, search results)
 
 * handle messages with multiple Message-IDs
@@ -47,6 +45,7 @@ all need to be considered for everything we introduce)
 
 * portability to FreeBSD (and other Free Software *BSDs)
   ugh... https://rt.cpan.org/Ticket/Display.html?id=116615
+  (IO::KQueue is broken with Danga::Socket)
 
 * improve documentation
 
diff --git a/examples/public-inbox.psgi b/examples/public-inbox.psgi
index e97f917f..98cde92d 100644
--- a/examples/public-inbox.psgi
+++ b/examples/public-inbox.psgi
@@ -14,6 +14,7 @@ my $www = PublicInbox::WWW->new;
 
 # share the public-inbox code itself:
 my $src = $ENV{SRC_GIT_DIR}; # '/path/to/public-inbox.git'
+$src = PublicInbox::Git->new($src) if defined $src;
 
 builder {
         eval {
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 8d66cf8c..6e31df72 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -145,7 +145,8 @@ sub _fill {
         my $rv = {};
 
         foreach my $k (qw(mainrepo address filter url newsgroup
-                        infourl watch watchheader httpbackendmax)) {
+                        infourl watch watchheader httpbackendmax
+                        feedmax)) {
                 my $v = $self->{"$pfx.$k"};
                 $rv->{$k} = $v if defined $v;
         }
diff --git a/lib/PublicInbox/EvCleanup.pm b/lib/PublicInbox/EvCleanup.pm
index 2b77c617..b9fe843b 100644
--- a/lib/PublicInbox/EvCleanup.pm
+++ b/lib/PublicInbox/EvCleanup.pm
@@ -30,9 +30,19 @@ sub _run_all ($) {
         $_->() foreach @$run;
 }
 
+# ensure Danga::Socket::ToClose fires after timers fire
+sub _asap_close () { $asapq->[1] ||= _asap_timer() }
+
 sub _run_asap () { _run_all($asapq) }
-sub _run_next () { _run_all($nextq) }
-sub _run_later () { _run_all($laterq) }
+sub _run_next () {
+        _run_all($nextq);
+        _asap_close();
+}
+
+sub _run_later () {
+        _run_all($laterq);
+        _asap_close();
+}
 
 # Called by Danga::Socket
 sub event_write {
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 31d82adb..2a33fd29 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -8,9 +8,6 @@ use warnings;
 use Email::MIME;
 use PublicInbox::View;
 use PublicInbox::WwwAtomStream;
-use constant {
-        MAX_PER_PAGE => 25, # this needs to be tunable
-};
 
 # main function
 sub generate {
@@ -114,7 +111,7 @@ sub new_html_footer {
 
 sub each_recent_blob {
         my ($ctx, $cb) = @_;
-        my $max = $ctx->{max} || MAX_PER_PAGE;
+        my $max = $ctx->{-inbox}->{feedmax};
         my $hex = '[a-f0-9]';
         my $addmsg = qr!^:000000 100644 \S+ \S+ A\t(${hex}{2}/${hex}{38})$!;
         my $delmsg = qr!^:100644 000000 \S+ \S+ D\t(${hex}{2}/${hex}{38})$!;
diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index 0275a2a0..c8f4706c 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -46,6 +46,9 @@ sub r ($;$) {
 sub serve {
         my ($env, $git, $path) = @_;
 
+        # XXX compatibility... ugh, can we stop supporting this?
+        $git = PublicInbox::Git->new($git) unless ref($git);
+
         # Documentation/technical/http-protocol.txt in git.git
         # requires one and exactly one query parameter:
         if ($env->{QUERY_STRING} =~ /\Aservice=git-\w+-pack\z/ ||
@@ -98,7 +101,7 @@ sub serve_dumb {
                 return r(404);
         }
 
-        my $f = (ref $git ? $git->{git_dir} : $git) . '/' . $path;
+        my $f = $git->{git_dir} . '/' . $path;
         return r(404) unless -f $f && -r _; # just in case it's a FIFO :P
         my $size = -s _;
 
@@ -196,21 +199,15 @@ sub serve_smart {
                 my $val = $env->{$name};
                 $env{$name} = $val if defined $val;
         }
-        my ($git_dir, $limiter);
-        if (ref $git) {
-                $limiter = $git->{-httpbackend_limiter} || $default_limiter;
-                $git_dir = $git->{git_dir};
-        } else {
-                $limiter = $default_limiter;
-                $git_dir = $git;
-        }
+        my $limiter = $git->{-httpbackend_limiter} || $default_limiter;
+        my $git_dir = $git->{git_dir};
         $env{GIT_HTTP_EXPORT_ALL} = '1';
         $env{PATH_TRANSLATED} = "$git_dir/$path";
-        my %rdr = ( 0 => fileno($in) );
-        my $x = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, \%rdr);
+        my $rdr = { 0 => fileno($in) };
+        my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr);
         my ($fh, $rpipe);
         my $end = sub {
-                if (my $err = $x->finish) {
+                if (my $err = $qsp->finish) {
                         err($env, "git http-backend ($git_dir): $err");
                 }
                 $fh->close if $fh; # async-only
@@ -227,8 +224,7 @@ sub serve_smart {
                 $r->[0] == 403 ? serve_dumb($env, $git, $path) : $r;
         };
         my $res;
-        my $async = $env->{'pi-httpd.async'};
-        my $io = $env->{'psgix.io'};
+        my $async = $env->{'pi-httpd.async'}; # XXX unstable API
         my $cb = sub {
                 my $r = $rd_hdr->() or return;
                 $rd_hdr = undef;
@@ -239,17 +235,16 @@ sub serve_smart {
                                 $rpipe->close;
                                 $end->();
                         }
-                        return $res->($r);
-                }
-                if ($async) {
+                        $res->($r);
+                } elsif ($async) {
                         $fh = $res->($r);
-                        return $async->async_pass($io, $fh, \$buf);
+                        $async->async_pass($env->{'psgix.io'}, $fh, \$buf);
+                } else { # for synchronous PSGI servers
+                        require PublicInbox::GetlineBody;
+                        $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end,
+                                                                $buf);
+                        $res->($r);
                 }
-
-                # for synchronous PSGI servers
-                require PublicInbox::GetlineBody;
-                $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end, $buf);
-                $res->($r);
         };
         sub {
                 ($res) = @_;
@@ -258,7 +253,7 @@ sub serve_smart {
                 # holding the input here is a waste of FDs and memory
                 $env->{'psgi.input'} = undef;
 
-                $x->start($limiter, sub { # may run later, much later...
+                $qsp->start($limiter, sub { # may run later, much later...
                         ($rpipe) = @_;
                         $in = undef;
                         if ($async) {
diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index cac14be3..c4b74b45 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -328,7 +328,7 @@ sub more ($$) {
 
 sub input_prepare {
         my ($self, $env) = @_;
-        my $input = $null_io;
+        my $input;
         my $len = $env->{CONTENT_LENGTH};
         if ($len) {
                 if ($len > $MAX_REQUEST_BUFFER) {
@@ -339,6 +339,8 @@ sub input_prepare {
         } elsif (env_chunked($env)) {
                 $len = CHUNK_START;
                 open($input, '+>', undef);
+        } else {
+                $input = $null_io;
         }
 
         # TODO: expire idle clients on ENFILE / EMFILE
diff --git a/lib/PublicInbox/HTTPD.pm b/lib/PublicInbox/HTTPD.pm
index 433d6da7..fb476624 100644
--- a/lib/PublicInbox/HTTPD.pm
+++ b/lib/PublicInbox/HTTPD.pm
@@ -29,6 +29,8 @@ sub new {
                 'psgi.multiprocess' => Plack::Util::TRUE,
                 'psgix.harakiri'=> Plack::Util::FALSE,
                 'psgix.input.buffered' => Plack::Util::TRUE,
+
+                # XXX unstable API!
                 'pi-httpd.async' => do {
                         no warnings 'once';
                         *pi_httpd_async
diff --git a/lib/PublicInbox/HTTPD/Async.pm b/lib/PublicInbox/HTTPD/Async.pm
index 68514f5a..79951ca6 100644
--- a/lib/PublicInbox/HTTPD/Async.pm
+++ b/lib/PublicInbox/HTTPD/Async.pm
@@ -30,10 +30,10 @@ sub restart_read_cb ($) {
 }
 
 sub async_pass {
-        my ($self, $io, $fh, $bref) = @_;
-        # In case the client HTTP connection ($io) dies, it
+        my ($self, $http, $fh, $bref) = @_;
+        # In case the client HTTP connection ($http) dies, it
         # will automatically close this ($self) object.
-        $io->{forward} = $self;
+        $http->{forward} = $self;
         $fh->write($$bref);
         my $restart_read = restart_read_cb($self);
         weaken($self);
@@ -41,10 +41,10 @@ sub async_pass {
                 my $r = sysread($self->{sock}, $$bref, 8192);
                 if ($r) {
                         $fh->write($$bref);
-                        return if $io->{closed};
-                        if ($io->{write_buf_size}) {
+                        return if $http->{closed};
+                        if ($http->{write_buf_size}) {
                                 $self->watch_read(0);
-                                $io->write($restart_read); # D::S::write
+                                $http->write($restart_read); # D::S::write
                         }
                         # stay in watch_read, but let other clients
                         # get some work done, too.
@@ -55,7 +55,7 @@ sub async_pass {
 
                 # Done! Error handling will happen in $fh->close
                 # called by the {cleanup} handler
-                $io->{forward} = undef;
+                $http->{forward} = undef;
                 $self->close;
         }
 }
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 8c639082..5503980f 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -29,11 +29,22 @@ sub _weaken_later ($) {
         $WEAKEN->{"$self"} = $self;
 }
 
+sub _set_uint ($$$) {
+        my ($opts, $field, $default) = @_;
+        my $val = $opts->{$field};
+        if (defined $val) {
+                $val = $val->[-1] if ref($val) eq 'ARRAY';
+                $val = undef if $val !~ /\A\d+\z/;
+        }
+        $opts->{$field} = $val || $default;
+}
+
 sub new {
         my ($class, $opts) = @_;
         my $v = $opts->{address} ||= 'public-inbox@example.com';
         my $p = $opts->{-primary_address} = ref($v) eq 'ARRAY' ? $v->[0] : $v;
         $opts->{domain} = ($p =~ /\@(\S+)\z/) ? $1 : 'localhost';
+        _set_uint($opts, 'feedmax', 25);
         weaken($opts->{-pi_config});
         bless $opts, $class;
 }
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index acd2a47e..8e1728c7 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -22,11 +22,10 @@ my $LINK_RE = qr{(\()?\b((?:ftps?|https?|nntps?|gopher)://
                  (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
                  )}xi;
 
-sub new { bless {}, shift }
+sub new { bless {}, $_[0] }
 
 sub linkify_1 {
-        my ($self, $s) = @_;
-        $s =~ s!$LINK_RE!
+        $_[1] =~ s!$LINK_RE!
                 my $beg = $1 || '';
                 my $url = $2;
                 my $end = '';
@@ -50,19 +49,17 @@ sub linkify_1 {
 
                 # only escape ampersands, others do not match LINK_RE
                 $url =~ s/&/&#38;/g;
-                $self->{$key} = $url;
+                $_[0]->{$key} = $url;
                 $beg . 'PI-LINK-'. $key . $end;
         !ge;
-        $s;
+        $_[1];
 }
 
 sub linkify_2 {
-        my ($self, $s) = @_;
-
         # Added "PI-LINK-" prefix to avoid false-positives on git commits
-        $s =~ s!\bPI-LINK-([a-f0-9]{40})\b!
+        $_[1] =~ s!\bPI-LINK-([a-f0-9]{40})\b!
                 my $key = $1;
-                my $url = $self->{$key};
+                my $url = $_[0]->{$key};
                 if (defined $url) {
                         "<a\nhref=\"$url\">$url</a>";
                 } else {
@@ -70,7 +67,7 @@ sub linkify_2 {
                         $key;
                 }
         !ge;
-        $s;
+        $_[1];
 }
 
 1;
diff --git a/lib/PublicInbox/Listener.pm b/lib/PublicInbox/Listener.pm
index 5f351a77..a3a2015b 100644
--- a/lib/PublicInbox/Listener.pm
+++ b/lib/PublicInbox/Listener.pm
@@ -13,7 +13,7 @@ require IO::Handle;
 sub new ($$$) {
         my ($class, $s, $cb) = @_;
         setsockopt($s, SOL_SOCKET, SO_KEEPALIVE, 1);
-        setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1);
+        setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1); # ignore errors on non-TCP
         listen($s, 1024);
         IO::Handle::blocking($s, 0);
         my $self = fields::new($class);
@@ -26,8 +26,12 @@ sub new ($$$) {
 sub event_read {
         my ($self) = @_;
         my $sock = $self->{sock};
+
         # no loop here, we want to fairly distribute clients
         # between multiple processes sharing the same socket
+        # XXX our event loop needs better granularity for
+        # a single accept() here to be, umm..., acceptable
+        # on high-traffic sites.
         if (my $addr = accept(my $c, $sock)) {
                 IO::Handle::blocking($c, 0); # no accept4 :<
                 $self->{post_accept}->($c, $addr, $sock);
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 24cb2667..b59430d8 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -108,12 +108,6 @@ my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
 
 sub xpfx { $all_pfx{$_[0]} }
 
-our %PFX2TERM_RMAP;
-my %meta_pfx = (mid => 1, thread => 1, path => 1);
-while (my ($k, $v) = each %all_pfx) {
-        $PFX2TERM_RMAP{$v} = $k if $meta_pfx{$k};
-}
-
 my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail');
 
 sub xdir {
@@ -295,8 +289,10 @@ sub lookup_message {
 
 sub lookup_mail { # no ghosts!
         my ($self, $mid) = @_;
-        my $smsg = lookup_message($self, $mid) or return;
-        PublicInbox::SearchMsg->load_doc($smsg->{doc});
+        retry_reopen($self, sub {
+                my $smsg = lookup_message($self, $mid) or return;
+                PublicInbox::SearchMsg->load_doc($smsg->{doc});
+        });
 }
 
 sub find_unique_doc_id {
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 5779d1e2..96406c6f 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -7,12 +7,9 @@ package PublicInbox::SearchMsg;
 use strict;
 use warnings;
 use Search::Xapian;
-use POSIX qw//;
 use Date::Parse qw/str2time/;
 use PublicInbox::MID qw/mid_clean/;
 use PublicInbox::Address;
-our $PFX2TERM_RE = undef;
-use POSIX qw(strftime);
 
 sub new {
         my ($class, $mime) = @_;
@@ -72,13 +69,18 @@ sub subject ($) { __hdr($_[0], 'subject') }
 sub to ($) { __hdr($_[0], 'to') }
 sub cc ($) { __hdr($_[0], 'cc') }
 
+# no strftime, that is locale-dependent
+my @DoW = qw(Sun Mon Tue Wed Thu Fri Sat);
+my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
+
 sub date ($) {
         my ($self) = @_;
-        my $date = __hdr($self, 'date');
-        return $date if defined $date;
         my $ts = $self->{ts};
         return unless defined $ts;
-        $self->{date} = strftime('%a, %d %b %Y %T +0000', gmtime($ts));
+        my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($ts);
+        "$DoW[$wday], " . sprintf("%02d $MoY[$mon] %04d %02d:%02d:%02d +0000",
+                                $mday, $year+1900, $hour, $min, $sec);
+
 }
 
 sub from ($) {
@@ -118,29 +120,17 @@ sub references {
         defined $x ? $x : '';
 }
 
-sub ensure_metadata {
-        my ($self) = @_;
+sub _get_term_val ($$$) {
+        my ($self, $pfx, $re) = @_;
         my $doc = $self->{doc};
         my $end = $doc->termlist_end;
-
-        unless (defined $PFX2TERM_RE) {
-                my $or = join('|', keys %PublicInbox::Search::PFX2TERM_RMAP);
-                $PFX2TERM_RE = qr/\A($or)/;
-        }
-
-        while (my ($pfx, $field) = each %PublicInbox::Search::PFX2TERM_RMAP) {
-                # ideally we'd move this out of the loop:
-                my $i = $doc->termlist_begin;
-
-                $i->skip_to($pfx);
-                if ($i != $end) {
-                        my $val = $i->get_termname;
-
-                        if ($val =~ s/$PFX2TERM_RE//o) {
-                                $self->{$field} = $val;
-                        }
-                }
+        my $i = $doc->termlist_begin;
+        $i->skip_to($pfx);
+        if ($i != $end) {
+                my $val = $i->get_termname;
+                $val =~ s/$re// and return $val;
         }
+        undef;
 }
 
 sub mid ($;$) {
@@ -151,8 +141,8 @@ sub mid ($;$) {
         } elsif (my $rv = $self->{mid}) {
                 $rv;
         } else {
-                $self->ensure_metadata; # needed for ghosts
-                $self->{mid} ||= $self->_extract_mid;
+                $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/) ||
+                                $self->_extract_mid;
         }
 }
 
@@ -191,16 +181,14 @@ sub thread_id {
         my ($self) = @_;
         my $tid = $self->{thread};
         return $tid if defined $tid;
-        $self->ensure_metadata;
-        $self->{thread};
+        $self->{thread} = _get_term_val($self, 'G', qr/\AG/); # *G*roup
 }
 
 sub path {
         my ($self) = @_;
         my $path = $self->{path};
         return $path if defined $path;
-        $self->ensure_metadata;
-        $self->{path};
+        $self->{path} = _get_term_val($self, 'XPATH', qr/\AXPATH/); # path
 }
 
 1;
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 601a84b0..2cd066db 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -21,32 +21,28 @@ package PublicInbox::SearchThread;
 use strict;
 use warnings;
 
-sub new {
-        return bless {
-                messages => $_[1],
-                id_table => {},
-                rootset  => []
-        }, $_[0];
-}
-
 sub thread {
-        my $self = shift;
-        _add_message($self, $_) foreach @{$self->{messages}};
-        my $id_table = delete $self->{id_table};
-        $self->{rootset} = [ grep {
+        my ($messages, $ordersub) = @_;
+        my $id_table = {};
+        _add_message($id_table, $_) foreach @$messages;
+        my $rootset = [ grep {
                 !delete($_->{parent}) && $_->visible } values %$id_table ];
+        $id_table = undef;
+        $rootset = $ordersub->($rootset);
+        $_->order_children($ordersub) for @$rootset;
+        $rootset;
 }
 
 sub _get_cont_for_id ($$) {
-        my ($self, $mid) = @_;
-        $self->{id_table}{$mid} ||= PublicInbox::SearchThread::Msg->new($mid);
+        my ($id_table, $mid) = @_;
+        $id_table->{$mid} ||= PublicInbox::SearchThread::Msg->new($mid);
 }
 
 sub _add_message ($$) {
-        my ($self, $smsg) = @_;
+        my ($id_table, $smsg) = @_;
 
         # A. if id_table...
-        my $this = _get_cont_for_id($self, $smsg->{mid});
+        my $this = _get_cont_for_id($id_table, $smsg->{mid});
         $this->{smsg} = $smsg;
 
         # B. For each element in the message's References field:
@@ -59,7 +55,7 @@ sub _add_message ($$) {
         my $prev;
         foreach my $ref ($refs =~ m/<([^>]+)>/g) {
                 # Find a Container object for the given Message-ID
-                my $cont = _get_cont_for_id($self, $ref);
+                my $cont = _get_cont_for_id($id_table, $ref);
 
                 # Link the References field's Containers together in
                 # the order implied by the References header
@@ -82,13 +78,6 @@ sub _add_message ($$) {
         $prev->add_child($this) if defined $prev;
 }
 
-sub order {
-        my ($self, $ordersub) = @_;
-        my $rootset = $ordersub->($self->{rootset});
-        $self->{rootset} = $rootset;
-        $_->order_children($ordersub) for @$rootset;
-}
-
 package PublicInbox::SearchThread::Msg;
 use strict;
 use warnings;
@@ -129,7 +118,7 @@ sub add_child {
 
 sub has_descendent {
         my ($self, $child) = @_;
-        my %seen; # loop prevention XXX may not be necessary
+        my %seen; # loop prevention
         while ($child) {
                 return 1 if $self == $child || $seen{$child}++;
                 $child = $child->{parent};
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 50a2c01c..bd634d8d 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -161,6 +161,15 @@ sub search_nav_bot {
         $rv .= '</pre>';
 }
 
+sub sort_relevance {
+        my ($pct) = @_;
+        sub {
+                [ sort { (eval { $pct->{$b->topmost->{id}} } || 0)
+                                <=>
+                        (eval { $pct->{$a->topmost->{id}} } || 0)
+        } @{$_[0]} ] };
+}
+
 sub mset_thread {
         my ($ctx, $mset, $q) = @_;
         my %pct;
@@ -171,18 +180,8 @@ sub mset_thread {
                 $smsg;
         } ($mset->items) ]});
 
-        my $th = PublicInbox::SearchThread->new($msgs);
-        $th->thread;
-        if ($q->{r}) { # order by relevance
-                $th->order(sub {
-                        [ sort { (eval { $pct{$b->topmost->{id}} } || 0)
-                                        <=>
-                                (eval { $pct{$a->topmost->{id}} } || 0)
-                        } @{$_[0]} ];
-                });
-        } else { # order by time (default for threaded view)
-                $th->order(*PublicInbox::View::sort_ts);
-        }
+        my $rootset = PublicInbox::SearchThread::thread($msgs,
+                $q->{r} ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts);
         my $skel = search_nav_bot($mset, $q). "<pre>";
         my $inbox = $ctx->{-inbox};
         $ctx->{-upfx} = '';
@@ -196,7 +195,7 @@ sub mset_thread {
         $ctx->{seen} = {};
         $ctx->{s_nr} = scalar(@$msgs).'+ results';
 
-        PublicInbox::View::walk_thread($th, $ctx,
+        PublicInbox::View::walk_thread($rootset, $ctx,
                 *PublicInbox::View::pre_thread);
 
         my $mime;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index fa47a16a..e4e9d7d2 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -260,8 +260,8 @@ sub _th_index_lite {
 }
 
 sub walk_thread {
-        my ($th, $ctx, $cb) = @_;
-        my @q = map { (0, $_, -1) } @{$th->{rootset}};
+        my ($rootset, $ctx, $cb) = @_;
+        my @q = map { (0, $_, -1) } @$rootset;
         while (@q) {
                 my ($level, $node, $i) = splice(@q, 0, 3);
                 defined $node or next;
@@ -285,10 +285,10 @@ sub thread_index_entry {
 }
 
 sub stream_thread ($$) {
-        my ($th, $ctx) = @_;
+        my ($rootset, $ctx) = @_;
         my $inbox = $ctx->{-inbox};
         my $mime;
-        my @q = map { (0, $_) } @{$th->{rootset}};
+        my @q = map { (0, $_) } @$rootset;
         my $level;
         while (@q) {
                 $level = shift @q;
@@ -350,10 +350,10 @@ sub thread_html {
         $ctx->{mapping} = {};
         $ctx->{s_nr} = "$nr+ messages in thread";
 
-        my $th = thread_results($msgs);
-        walk_thread($th, $ctx, *pre_thread);
+        my $rootset = thread_results($msgs);
+        walk_thread($rootset, $ctx, *pre_thread);
         $skel .= '</pre>';
-        return stream_thread($th, $ctx) unless $ctx->{flat};
+        return stream_thread($rootset, $ctx) unless $ctx->{flat};
 
         # flat display: lazy load the full message from smsg
         my $inbox = $ctx->{-inbox};
@@ -441,7 +441,7 @@ sub attach_link ($$$$;$) {
 
 sub add_text_body {
         my ($upfx, $p) = @_; # from msg_iter: [ Email::MIME, depth, @idx ]
-        my ($part, $depth, @idx) = @$p;
+        my ($part, $depth) = @$p; # attachment @idx is unused
         my $ct = $part->content_type || 'text/plain';
         my $fn = $part->filename;
 
@@ -476,29 +476,26 @@ sub add_text_body {
         }
         my @quot;
         my $l = PublicInbox::Linkify->new;
-        while (defined(my $cur = shift @lines)) {
+        foreach my $cur (@lines) {
                 if ($cur !~ /^>/) {
                         # show the previously buffered quote inline
                         flush_quote(\$s, $l, \@quot) if @quot;
 
                         # regular line, OK
-                        $cur = $l->linkify_1($cur);
-                        $cur = ascii_html($cur);
-                        $s .= $l->linkify_2($cur);
+                        $l->linkify_1($cur);
+                        $s .= $l->linkify_2(ascii_html($cur));
                 } else {
                         push @quot, $cur;
                 }
         }
 
-        my $end = "\n";
-        if (@quot) {
-                $end = '';
+        if (@quot) { # ugh, top posted
                 flush_quote(\$s, $l, \@quot);
+        } elsif ($s =~ /\n\z/s) { # common, last line ends with a newline
+                $s;
+        } else { # some editors don't do newlines...
+                $s .= "\n";
         }
-        $s =~ s/[ \t]+$//sgm; # kill per-line trailing whitespace
-        $s =~ s/\A\n+//s; # kill leading blank lines
-        $s =~ s/\s+\z//s; # kill all trailing spaces
-        $s .= $end;
 }
 
 sub _msg_html_prepare {
@@ -737,7 +734,7 @@ sub indent_for {
 sub load_results {
         my ($srch, $sres) = @_;
         my $msgs = delete $sres->{msgs};
-        $srch->retry_reopen(sub { [ map { $_->ensure_metadata; $_ } @$msgs ] });
+        $srch->retry_reopen(sub { [ map { $_->mid; $_ } @$msgs ] });
 }
 
 sub msg_timestamp {
@@ -749,10 +746,7 @@ sub msg_timestamp {
 sub thread_results {
         my ($msgs) = @_;
         require PublicInbox::SearchThread;
-        my $th = PublicInbox::SearchThread->new($msgs);
-        $th->thread;
-        $th->order(*sort_ts);
-        $th
+        PublicInbox::SearchThread::thread($msgs, *sort_ts);
 }
 
 sub missing_thread {
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 5720384c..a6817b31 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -9,10 +9,10 @@ use warnings;
 # FIXME: locale-independence:
 use POSIX qw(strftime);
 use Date::Parse qw(strptime);
-
+use Digest::SHA qw(sha1_hex);
 use PublicInbox::Address;
 use PublicInbox::Hval qw(ascii_html);
-use PublicInbox::MID qw/mid_clean mid2path mid_escape/;
+use PublicInbox::MID qw/mid_clean mid_escape/;
 
 # called by PSGI server after getline:
 sub close {}
@@ -72,7 +72,8 @@ sub atom_header {
         my $mtime = (stat($ibx->{mainrepo}))[9] || time;
 
         qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
-        qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
+        qq(<feed\nxmlns="http://www.w3.org/2005/Atom"\n) .
+        qq(xmlns:thr="http://purl.org/syndication/thread/1.0">) .
         qq{$title} .
         qq(<link\nrel="alternate"\ntype="text/html") .
                 qq(\nhref="$base_url"/>) .
@@ -81,22 +82,33 @@ sub atom_header {
         feed_updated(gmtime($mtime));
 }
 
+sub mid2uuid ($) {
+        my ($mid) = @_;
+        utf8::encode($mid); # really screwed up In-Reply-To fields exist
+        $mid = sha1_hex($mid);
+        my $h = '[a-f0-9]';
+        my (@uuid5) = ($mid =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
+        'urn:uuid:' . join('-', @uuid5);
+}
+
 # returns undef or string
 sub feed_entry {
         my ($self, $mime) = @_;
         my $ctx = $self->{ctx};
         my $hdr = $mime->header_obj;
         my $mid = mid_clean($hdr->header_raw('Message-ID'));
-
-        my $uuid = mid2path($mid);
-        $uuid =~ tr!/!!d;
-        my $h = '[a-f0-9]';
-        my (@uuid5) = ($uuid =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
-        $uuid = 'urn:uuid:' . join('-', @uuid5);
-
-        $mid = PublicInbox::Hval->new_msgid($mid);
-        my $href = $ctx->{feed_base_url} . $mid->{href}. '/';
-
+        my $irt = PublicInbox::View::in_reply_to($hdr);
+        my $uuid = mid2uuid($mid);
+        my $base = $ctx->{feed_base_url};
+        if (defined $irt) {
+                my $irt_uuid = mid2uuid($irt);
+                $irt = mid_escape($irt);
+                $irt = qq(<thr:in-reply-to\nref="$irt_uuid"\n).
+                        qq(href="$base$irt/"/>);
+        } else {
+                $irt = '';
+        }
+        my $href = $base . mid_escape($mid) . '/';
         my $date = $hdr->header('Date');
         my @t = eval { strptime($date) } if defined $date;
         @t = gmtime(time) unless scalar @t;
@@ -124,7 +136,7 @@ sub feed_entry {
                 PublicInbox::View::multipart_text_as_html($mime, $href) .
                 '</pre>' .
                 qq!</div></content><link\nhref="$href"/>!.
-                "<id>$uuid</id></entry>";
+                "<id>$uuid</id>$irt</entry>";
 }
 
 sub feed_updated {
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index b0f262cd..449cb499 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -1,7 +1,6 @@
 # Copyright (C) 2016 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-#
-# serves the /$INBOX/_/* endpoints from :text/* of the git tree
+
 package PublicInbox::WwwText;
 use strict;
 use warnings;
@@ -182,6 +181,10 @@ message threading
         $WIKI_URL/Atom_(standard)
         https://tools.ietf.org/html/rfc4287
 
+      Atom Threading Extensions (RFC4685) is supported:
+
+        https://tools.ietf.org/html/rfc4685
+
     Finally, the gzipped mbox for a thread is available for
     downloading and importing into your favorite mail client:
 
diff --git a/t/config.t b/t/config.t
index 073d1d03..7271351b 100644
--- a/t/config.t
+++ b/t/config.t
@@ -30,6 +30,7 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1);
                 'url' => 'http://example.com/meta',
                 -primary_address => 'meta@public-inbox.org',
                 'name' => 'meta',
+                feedmax => 25,
                 -pi_config => $cfg,
         }, "lookup matches expected output");
 
@@ -45,6 +46,7 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1);
                 'mainrepo' => '/home/pi/test-main.git',
                 'domain' => 'public-inbox.org',
                 'name' => 'test',
+                feedmax => 25,
                 'url' => 'http://example.com/test',
                 -pi_config => $cfg,
         }, "lookup matches expected output for test");
diff --git a/t/feed.t b/t/feed.t
index 19a9ba09..b60273ed 100644
--- a/t/feed.t
+++ b/t/feed.t
@@ -46,6 +46,7 @@ my $ibx = PublicInbox::Inbox->new({
         name => 'testbox',
         mainrepo => $git_dir,
         url => 'http://example.com/test',
+        feedmax => 3,
 });
 my $git = $ibx->git;
 my $im = PublicInbox::Import->new($git, $ibx->{name}, 'test@example');
@@ -101,10 +102,7 @@ EOF
 {
         # check initial feed
         {
-                my $feed = string_feed({
-                        -inbox => $ibx,
-                        max => 3
-                });
+                my $feed = string_feed({ -inbox => $ibx });
                 SKIP: {
                         skip 'XML::Feed missing', 2 unless $have_xml_feed;
                         my $p = XML::Feed->parse(\$feed);
@@ -142,10 +140,7 @@ EOF
 
         # check spam shows up
         {
-                my $spammy_feed = string_feed({
-                        -inbox => $ibx,
-                        max => 3
-                });
+                my $spammy_feed = string_feed({ -inbox => $ibx });
                 SKIP: {
                         skip 'XML::Feed missing', 2 unless $have_xml_feed;
                         my $p = XML::Feed->parse(\$spammy_feed);
@@ -167,7 +162,7 @@ EOF
 
         # spam no longer shows up
         {
-                my $feed = string_feed({ -inbox => $ibx, max => 3 });
+                my $feed = string_feed({ -inbox => $ibx });
                 SKIP: {
                         skip 'XML::Feed missing', 2 unless $have_xml_feed;
                         my $p = XML::Feed->parse(\$feed);
diff --git a/t/search.t b/t/search.t
index eed9c9b6..c16811d8 100644
--- a/t/search.t
+++ b/t/search.t
@@ -109,7 +109,6 @@ sub filter_mids {
         my $found = $ro->lookup_message('<root@s>');
         ok($found, "message found");
         is($root_id, $found->{doc_id}, 'doc_id set correctly');
-        $found->ensure_metadata;
         is($found->mid, 'root@s', 'mid set correctly');
         ok(int($found->thread_id) > 0, 'thread_id is an integer');
 
@@ -290,7 +289,6 @@ sub filter_mids {
                 body => "LOOP!\n"));
         ok($doc_id > 0, "doc_id defined with circular reference");
         my $smsg = $rw->lookup_message('circle@a');
-        $smsg->ensure_metadata;
         is($smsg->references, '', "no references created");
         my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
         is($s, $msg->subject, 'long subject not rewritten');
diff --git a/t/thread-all.t b/t/thread-all.t
new file mode 100644
index 00000000..8ccf4f8c
--- /dev/null
+++ b/t/thread-all.t
@@ -0,0 +1,38 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# real-world testing of search threading
+use strict;
+use warnings;
+use Test::More;
+use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
+my $pi_dir = $ENV{GIANT_PI_DIR};
+plan skip_all => "GIANT_PI_DIR not defined for $0" unless $pi_dir;
+eval { require PublicInbox::Search; };
+plan skip_all => "Xapian missing for $0" if $@;
+my $srch = eval { PublicInbox::Search->new($pi_dir) };
+plan skip_all => "$pi_dir not initialized for $0" if $@;
+
+require PublicInbox::View;
+require PublicInbox::SearchThread;
+
+my $pfx = PublicInbox::Search::xpfx('thread');
+my $opts = { limit => 1000000, asc => 1 };
+my $t0 = clock_gettime(CLOCK_MONOTONIC);
+my $elapsed;
+
+my $sres = $srch->_do_enquire(undef, $opts);
+$elapsed = clock_gettime(CLOCK_MONOTONIC) - $t0;
+diag "enquire: $elapsed";
+
+$t0 = clock_gettime(CLOCK_MONOTONIC);
+my $msgs = PublicInbox::View::load_results($srch, $sres);
+$elapsed = clock_gettime(CLOCK_MONOTONIC) - $t0;
+diag "load_results $elapsed";
+
+$t0 = clock_gettime(CLOCK_MONOTONIC);
+PublicInbox::View::thread_results($msgs);
+$elapsed = clock_gettime(CLOCK_MONOTONIC) - $t0;
+diag "thread_results $elapsed";
+
+done_testing();
diff --git a/t/thread-cycle.t b/t/thread-cycle.t
index b0844490..16ceee71 100644
--- a/t/thread-cycle.t
+++ b/t/thread-cycle.t
@@ -3,7 +3,6 @@
 use strict;
 use warnings;
 use Test::More;
-use_ok('PublicInbox::SearchMsg');
 use_ok('PublicInbox::SearchThread');
 use Email::Simple;
 my $mt = eval {
@@ -73,11 +72,11 @@ SKIP: {
 done_testing();
 
 sub thread_to_s {
-        my $th = PublicInbox::SearchThread->new(shift);
-        $th->thread;
-        $th->order(sub { [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] });
+        my ($msgs) = @_;
+        my $rootset = PublicInbox::SearchThread::thread($msgs, sub {
+                [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] });
         my $st = '';
-        my @q = map { (0, $_) } @{$th->{rootset}};
+        my @q = map { (0, $_) } @$rootset;
         while (@q) {
                 my $level = shift @q;
                 my $node = shift @q or next;