From 3fc59df0d633a17e0c5e43d633d12e8772c06ec3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 10 Jan 2017 21:40:37 +0000 Subject: introduce PublicInbox::MIME wrapper class This should fix problems with multipart messages where text/plain parts lack a header. cf. git clone --mirror https://github.com/rjbs/Email-MIME.git refs/pull/28/head In the future, we may still introduce as streaming interface to reduce memory usage on large emails. --- MANIFEST | 2 + lib/PublicInbox/Feed.pm | 6 +-- lib/PublicInbox/Filter/Vger.pm | 2 +- lib/PublicInbox/Import.pm | 2 +- lib/PublicInbox/MIME.pm | 102 ++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/MsgIter.pm | 22 +-------- lib/PublicInbox/Search.pm | 2 +- lib/PublicInbox/SearchIdx.pm | 4 +- lib/PublicInbox/SearchView.pm | 6 +-- lib/PublicInbox/View.pm | 8 ++-- lib/PublicInbox/WWW.pm | 6 +-- lib/PublicInbox/WatchMaildir.pm | 6 +-- lib/PublicInbox/WwwAttach.pm | 4 +- script/public-inbox-learn | 4 +- script/public-inbox-mda | 2 +- t/import.t | 6 +-- t/mime.t | 57 ++++++++++++++++++++++ 17 files changed, 191 insertions(+), 50 deletions(-) create mode 100644 lib/PublicInbox/MIME.pm create mode 100644 t/mime.t diff --git a/MANIFEST b/MANIFEST index 8f5e487e..76fd1da6 100644 --- a/MANIFEST +++ b/MANIFEST @@ -65,6 +65,7 @@ lib/PublicInbox/Linkify.pm lib/PublicInbox/Listener.pm lib/PublicInbox/MDA.pm lib/PublicInbox/MID.pm +lib/PublicInbox/MIME.pm lib/PublicInbox/Mbox.pm lib/PublicInbox/MsgIter.pm lib/PublicInbox/Msgmap.pm @@ -143,6 +144,7 @@ t/linkify.t t/main-bin/spamc t/mda.t t/mid.t +t/mime.t t/msg_iter.t t/msgmap.t t/nntp.t diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 2a33fd29..e5d57550 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -5,7 +5,7 @@ package PublicInbox::Feed; use strict; use warnings; -use Email::MIME; +use PublicInbox::MIME; use PublicInbox::View; use PublicInbox::WwwAtomStream; @@ -39,7 +39,7 @@ sub generate_thread_atom { PublicInbox::WwwAtomStream->response($ctx, 200, sub { while (my $msg = shift @$msgs) { $msg = $ibx->msg_by_smsg($msg) and - return Email::MIME->new($msg); + return PublicInbox::MIME->new($msg); } }); } @@ -175,7 +175,7 @@ sub each_recent_blob { sub do_cat_mail { my ($ibx, $path) = @_; my $mime = eval { $ibx->msg_by_path($path) } or return; - Email::MIME->new($mime); + PublicInbox::MIME->new($mime); } 1; diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm index 2ffed184..905f28d7 100644 --- a/lib/PublicInbox/Filter/Vger.pm +++ b/lib/PublicInbox/Filter/Vger.pm @@ -25,7 +25,7 @@ sub scrub { # so in multipart (e.g. GPG-signed) messages, the list trailer # becomes invisible to MIME-aware email clients. if ($s =~ s/$l0\n$l1\n$l2\n$l3\n($l4\n)?\z//os) { - $mime = Email::MIME->new(\$s); + $mime = PublicInbox::MIME->new(\$s); } $self->ACCEPT($mime); } diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 1ac112b8..13671a4f 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -119,7 +119,7 @@ sub remove { $n = read($r, my $lf, 1); defined($n) or die "read final byte of cat-blob failed: $!"; die "bad read on final byte: <$lf>" if $lf ne "\n"; - my $cur = Email::MIME->new($buf); + my $cur = PublicInbox::MIME->new($buf); my $cur_s = $cur->header('Subject'); $cur_s = '' unless defined $cur_s; my $cur_m = $mime->header('Subject'); diff --git a/lib/PublicInbox/MIME.pm b/lib/PublicInbox/MIME.pm new file mode 100644 index 00000000..792fffd6 --- /dev/null +++ b/lib/PublicInbox/MIME.pm @@ -0,0 +1,102 @@ +# This library is free software; you can redistribute it and/or modify +# it under the same terms as Perl itself. +# +# The license for this file differs from the rest of public-inbox. +# +# It monkey patches the "parts_multipart" subroutine with patches +# from Matthew Horsfall at: +# +# git clone --mirror https://github.com/rjbs/Email-MIME.git refs/pull/28/head +# +# commit fe0eb870ab732507aa39a1070a2fd9435c7e4877 +# ("Make sure we don't modify the body of a message when injecting a header.") +# commit 981d8201a7239b02114489529fd366c4c576a146 +# ("GH #14 - Handle CRLF emails properly.") +# commit 2338d93598b5e8432df24bda8dfdc231bdeb666e +# ("GH #14 - Support multipart messages without content-type in subparts.") +# +# For Email::MIME >= 1.923 && < 1.935, +# commit dcef9be66c49ae89c7a5027a789bbbac544499ce +# ("removing all trailing newlines was too much") +# is also included +package PublicInbox::MIME; +use strict; +use warnings; +use base qw(Email::MIME); + +if ($Email::MIME::VERSION <= 1.937) { +sub parts_multipart { + my $self = shift; + my $boundary = $self->{ct}->{attributes}->{boundary}; + + # Take a message, join all its lines together. Now try to Email::MIME->new + # it with 1.861 or earlier. Death! It tries to recurse endlessly on the + # body, because every time it splits on boundary it gets itself. Obviously + # that means it's a bogus message, but a mangled result (or exception) is + # better than endless recursion. -- rjbs, 2008-01-07 + return $self->parts_single_part + unless $boundary and $self->body_raw =~ /^--\Q$boundary\E\s*$/sm; + + $self->{body_raw} = $self->SUPER::body; + + # rfc1521 7.2.1 + my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2; + + # Split on boundaries, but keep blank lines after them intact + my @bits = split /^--\Q$boundary\E\s*?(?=$self->{mycrlf})/m, ($body || ''); + + $self->SUPER::body_set(undef); + + # If there are no headers in the potential MIME part, it's just part of the + # body. This is a horrible hack, although it's debatable whether it was + # better or worse when it was $self->{body} = shift @bits ... -- rjbs, + # 2006-11-27 + $self->SUPER::body_set(shift @bits) if ($bits[0] || '') !~ /.*:.*/; + + my $bits = @bits; + + my @parts; + for my $bit (@bits) { + # Parts don't need headers. If they don't have them, they look like this: + # + # --90e6ba6e8d06f1723604fc1b809a + # + # Part 2 + # + # Part 2a + # + # $bit will contain two new lines before Part 2. + # + # Anything with headers will only have one new line. + # + # RFC 1341 Section 7.2 says parts without headers are to be considered + # plain US-ASCII text. -- alh + # 2016-08-01 + my $added_header; + + if ($bit =~ /^(?:$self->{mycrlf}){2}/) { + $bit = "Content-type: text/plain; charset=us-ascii" . $bit; + + $added_header = 1; + } + + $bit =~ s/\A[\n\r]+//smg; + $bit =~ s/(?{mycrlf}\Z//sm; + + my $email = (ref $self)->new($bit); + + if ($added_header) { + # Remove our changes so we don't change the raw email content + $email->header_str_set('Content-Type'); + } + + push @parts, $email; + } + + $self->{parts} = \@parts; + + return @{ $self->{parts} }; +} +} + +1; diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm index ef0d209f..5be06a44 100644 --- a/lib/PublicInbox/MsgIter.pm +++ b/lib/PublicInbox/MsgIter.pm @@ -6,17 +6,7 @@ use strict; use warnings; use base qw(Exporter); our @EXPORT = qw(msg_iter); -use Email::MIME; -use Scalar::Util qw(readonly); - -# Workaround Email::MIME versions without -# commit dcef9be66c49ae89c7a5027a789bbbac544499ce -# ("removing all trailing newlines was too much") -# This is necessary for Debian jessie -my $bad = 1.923; -my $good = 1.935; -my $ver = $Email::MIME::VERSION; -my $extra_nl = 1 if ($ver >= $bad && $ver < $good); +use PublicInbox::MIME; # Like Email::MIME::walk_parts, but this is: # * non-recursive @@ -36,16 +26,6 @@ sub msg_iter ($$) { @sub = map { [ $_, $depth, @idx, ++$i ] } @sub; @parts = (@sub, @parts); } else { - if ($extra_nl) { - my $lf = $part->{mycrlf}; - my $bref = $part->{body}; - if (readonly($$bref)) { - my $s = $$bref . $lf; - $part->{body} = \$s; - } else { - $$bref .= $lf; - } - } $cb->($p); } } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index a1bae419..c9094245 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -16,7 +16,7 @@ use constant YYYYMMDD => 4; # for searching in the WWW UI use Search::Xapian qw/:standard/; use PublicInbox::SearchMsg; -use Email::MIME; +use PublicInbox::MIME; use PublicInbox::MID qw/mid_clean id_compress/; # This is English-only, everything else is non-standard and may be confused as diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 87ee0d46..d63dd7c7 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -10,7 +10,7 @@ package PublicInbox::SearchIdx; use strict; use warnings; use Fcntl qw(:flock :DEFAULT); -use Email::MIME; +use PublicInbox::MIME; use Email::MIME::ContentType; $Email::MIME::ContentType::STRICT_PARAMS = 0; use base qw(PublicInbox::Search); @@ -400,7 +400,7 @@ sub do_cat_mail { my $str = $git->cat_file($blob, $sizeref); # fixup bugs from import: $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; - Email::MIME->new($str); + PublicInbox::MIME->new($str); }; $@ ? undef : $mime; } diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index bd634d8d..ccc53abf 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -10,7 +10,7 @@ use PublicInbox::Hval qw/ascii_html/; use PublicInbox::View; use PublicInbox::WwwAtomStream; use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape); -use Email::MIME; +use PublicInbox::MIME; require PublicInbox::Git; require PublicInbox::SearchThread; our $LIM = 50; @@ -205,7 +205,7 @@ sub mset_thread { $mime = $inbox->msg_by_smsg($mime) and last; } if ($mime) { - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); return PublicInbox::View::index_entry($mime, $ctx, scalar @$msgs); } @@ -239,7 +239,7 @@ sub adump { while (my $x = shift @items) { $x = load_doc_retry($srch, $x); $x = $ibx->msg_by_smsg($x) and - return Email::MIME->new($x); + return PublicInbox::MIME->new($x); } return undef; }); diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index e4e9d7d2..2c37cd42 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -299,7 +299,7 @@ sub stream_thread ($$) { } return missing_thread($ctx) unless $mime; - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); $ctx->{-title_html} = ascii_html($mime->header('Subject')); $ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime); PublicInbox::WwwStream->response($ctx, 200, sub { @@ -311,7 +311,7 @@ sub stream_thread ($$) { unshift @q, map { ($cl, $_) } @{$node->{children}}; my $mid = $node->{id}; if ($mime = $inbox->msg_by_smsg($node->{smsg})) { - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); return thread_index_entry($ctx, $level, $mime); } else { return ghost_index_entry($ctx, $level, $node); @@ -362,7 +362,7 @@ sub thread_html { $mime = $inbox->msg_by_smsg($mime) and last; } return missing_thread($ctx) unless $mime; - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); $ctx->{-title_html} = ascii_html($mime->header('Subject')); $ctx->{-html_tip} = '
'.index_entry($mime, $ctx, scalar @$msgs);
 	$mime = undef;
@@ -372,7 +372,7 @@ sub thread_html {
 			$mime = $inbox->msg_by_smsg($mime) and last;
 		}
 		if ($mime) {
-			$mime = Email::MIME->new($mime);
+			$mime = PublicInbox::MIME->new($mime);
 			return index_entry($mime, $ctx, scalar @$msgs);
 		}
 		$msgs = undef;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 11fc92e9..430e6b19 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -113,7 +113,7 @@ sub preload {
 	require PublicInbox::Feed;
 	require PublicInbox::View;
 	require PublicInbox::SearchThread;
-	require Email::MIME;
+	require PublicInbox::MIME;
 	require Digest::SHA;
 	require POSIX;
 
@@ -225,8 +225,8 @@ sub get_mid_html {
 	my $x = mid2blob($ctx) or return r404($ctx);
 
 	require PublicInbox::View;
-	require Email::MIME;
-	my $mime = Email::MIME->new($x);
+	require PublicInbox::MIME;
+	my $mime = PublicInbox::MIME->new($x);
 	searcher($ctx);
 	PublicInbox::View::msg_html($ctx, $mime);
 }
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index b7c2d17a..d08f2297 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -6,7 +6,7 @@
 package PublicInbox::WatchMaildir;
 use strict;
 use warnings;
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType;
 $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
 use PublicInbox::Git;
@@ -207,7 +207,7 @@ sub _path_to_mime {
 		local $/;
 		my $str = <$fh>;
 		$str or return;
-		return Email::MIME->new(\$str);
+		return PublicInbox::MIME->new(\$str);
 	} elsif ($!{ENOENT}) {
 		return;
 	} else {
@@ -247,7 +247,7 @@ sub _spamcheck_cb {
 		my ($mime) = @_;
 		my $tmp = '';
 		if ($sc->spamcheck($mime, \$tmp)) {
-			return Email::MIME->new(\$tmp);
+			return PublicInbox::MIME->new(\$tmp);
 		}
 		warn $mime->header('Message-ID')." failed spam check\n";
 		undef;
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index 33bfce27..a5ba5b2b 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -5,7 +5,7 @@
 package PublicInbox::WwwAttach; # internal package
 use strict;
 use warnings;
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType qw(parse_content_type);
 $Email::MIME::ContentType::STRICT_PARAMS = 0;
 use PublicInbox::MsgIter;
@@ -15,7 +15,7 @@ sub get_attach ($$$) {
 	my ($ctx, $idx, $fn) = @_;
 	my $res = [ 404, [ 'Content-Type', 'text/plain' ], [ "Not found\n" ] ];
 	my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res;
-	$mime = Email::MIME->new($mime);
+	$mime = PublicInbox::MIME->new($mime);
 	msg_iter($mime, sub {
 		my ($part, $depth, @idx) = @{$_[0]};
 		return if join('.', @idx) ne $idx;
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 396ab489..a696d348 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -10,7 +10,7 @@ use warnings;
 use PublicInbox::Config;
 use PublicInbox::Git;
 use PublicInbox::Import;
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType;
 $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
 use PublicInbox::Address;
@@ -23,7 +23,7 @@ if ($train !~ /\A(?:ham|spam)\z/) {
 my $spamc = PublicInbox::Spamcheck::Spamc->new;
 my $pi_config = PublicInbox::Config->new;
 my $err;
-my $mime = Email::MIME->new(eval {
+my $mime = PublicInbox::MIME->new(eval {
 	local $/;
 	my $data = scalar ;
 	$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 8b5258f5..60f817dc 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -52,7 +52,7 @@ $simple = undef;
 $emm = PublicInbox::Emergency->new($emergency);
 $emm->prepare(\$str);
 $ems = $ems->abort;
-my $mime = Email::MIME->new(\$str);
+my $mime = PublicInbox::MIME->new(\$str);
 $str = '';
 do_exit(0) unless $spam_ok;
 
diff --git a/t/import.t b/t/import.t
index 73f92adb..29d00b05 100644
--- a/t/import.t
+++ b/t/import.t
@@ -3,7 +3,7 @@
 use strict;
 use warnings;
 use Test::More;
-use Email::MIME;
+use PublicInbox::MIME;
 use PublicInbox::Git;
 use PublicInbox::Import;
 use File::Temp qw/tempdir/;
@@ -13,7 +13,7 @@ is(system(qw(git init -q --bare), $dir), 0, 'git init successful');
 my $git = PublicInbox::Git->new($dir);
 
 my $im = PublicInbox::Import->new($git, 'testbox', 'test@example');
-my $mime = Email::MIME->create(
+my $mime = PublicInbox::MIME->create(
 	header => [
 		From => 'a@example.com',
 		To => 'b@example.com',
@@ -50,7 +50,7 @@ $im->done;
 is(scalar @revs, 26, '26 revisions exist after mass import');
 my ($mark, $msg) = $im->remove($mime);
 like($mark, qr/\A:\d+\z/, 'got mark');
-is(ref($msg), 'Email::MIME', 'got old message deleted');
+is(ref($msg), 'PublicInbox::MIME', 'got old message deleted');
 
 is(undef, $im->remove($mime), 'remove is idempotent');
 
diff --git a/t/mime.t b/t/mime.t
new file mode 100644
index 00000000..cd3303d1
--- /dev/null
+++ b/t/mime.t
@@ -0,0 +1,57 @@
+# Copyright (C) 2017 all contributors 
+# This library is free software; you can redistribute it and/or modify
+# it under the same terms as Perl itself.
+# Artistic or GPL-1+ 
+use strict;
+use warnings;
+use Test::More;
+use_ok 'PublicInbox::MIME';
+
+my $msg = PublicInbox::MIME->new(
+'From:   Richard Hansen 
+To:     git@vger.kernel.org
+Cc:     Richard Hansen 
+Subject: [PATCH 0/2] minor diff orderfile documentation improvements
+Date:   Mon,  9 Jan 2017 19:40:29 -0500
+Message-Id: <20170110004031.57985-1-hansenr@google.com>
+X-Mailer: git-send-email 2.11.0.390.gc69c2f50cf-goog
+Content-Type: multipart/signed; protocol="application/pkcs7-signature"; micalg=sha-256;
+        boundary="94eb2c0bc864b76ba30545b2bca9"
+
+--94eb2c0bc864b76ba30545b2bca9
+
+Richard Hansen (2):
+  diff: document behavior of relative diff.orderFile
+  diff: document the pattern format for diff.orderFile
+
+ Documentation/diff-config.txt  | 5 ++++-
+ Documentation/diff-options.txt | 3 ++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+
+--94eb2c0bc864b76ba30545b2bca9
+Content-Type: application/pkcs7-signature; name="smime.p7s"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; filename="smime.p7s"
+Content-Description: (truncated) S/MIME Cryptographic Signature
+
+dkTlB69771K2eXK4LcHSH/2LqX+VYa3K44vrx1ruzjXdNWzIpKBy0weFNiwnJCGofvCysM2RCSI1
+--94eb2c0bc864b76ba30545b2bca9--
+
+');
+
+my @parts = $msg->parts;
+my $exp = 'Richard Hansen (2):
+  diff: document behavior of relative diff.orderFile
+  diff: document the pattern format for diff.orderFile
+
+ Documentation/diff-config.txt  | 5 ++++-
+ Documentation/diff-options.txt | 3 ++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+';
+
+ok($msg->isa('Email::MIME'), 'compatible with Email::MIME');
+is($parts[0]->body, $exp, 'body matches expected');
+
+done_testing();
-- 
cgit v1.2.3-24-ge0c7


From 1975aeaebbbdd628849964de42e183d04240c4e0 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 11 Jan 2017 10:13:00 +0000
Subject: inbox: reinstate periodic cleanup of Xapian and SQLite objects

We may need to do this even more aggressively, since the
Xapian database does not always give the latest results.
This time, we'll do it without relying on weak references,
and instead check refcounts.
---
 lib/PublicInbox/Inbox.pm | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 51ada0bc..a0d69f18 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -7,6 +7,7 @@ use strict;
 use warnings;
 use PublicInbox::Git;
 use PublicInbox::MID qw(mid2path);
+use Devel::Peek qw(SvREFCNT);
 
 my $cleanup_timer;
 eval {
@@ -18,10 +19,20 @@ eval {
 my $CLEANUP = {}; # string(inbox) -> inbox
 sub cleanup_task () {
 	$cleanup_timer = undef;
-	delete $_->{git} for values %$CLEANUP;
+	for my $ibx (values %$CLEANUP) {
+		foreach my $f (qw(git mm search)) {
+			delete $ibx->{$f} if SvREFCNT($ibx->{$f}) == 1;
+		}
+	}
 	$CLEANUP = {};
 }
 
+sub _cleanup_later ($) {
+	my ($self) = @_;
+	$cleanup_timer ||= PublicInbox::EvCleanup::later(*cleanup_task);
+	$CLEANUP->{"$self"} = $self;
+}
+
 sub _set_uint ($$$) {
 	my ($opts, $field, $default) = @_;
 	my $val = $opts->{$field};
@@ -70,20 +81,23 @@ sub git {
 	$self->{git} ||= eval {
 		my $g = PublicInbox::Git->new($self->{mainrepo});
 		$g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
-		$cleanup_timer ||= PublicInbox::EvCleanup::later(*cleanup_task);
-		$CLEANUP->{"$self"} = $self;
+		_cleanup_later($self);
 		$g;
 	};
 }
 
 sub mm {
 	my ($self) = @_;
-	$self->{mm} ||= eval { PublicInbox::Msgmap->new($self->{mainrepo}) };
+	$self->{mm} ||= eval {
+		_cleanup_later($self);
+		PublicInbox::Msgmap->new($self->{mainrepo});
+	};
 }
 
 sub search {
 	my ($self) = @_;
 	$self->{search} ||= eval {
+		_cleanup_later($self);
 		PublicInbox::Search->new($self->{mainrepo}, $self->{altid});
 	};
 }
-- 
cgit v1.2.3-24-ge0c7


From b6f6d1e1408ebf9ad71b9c912a82e3f92ed5d52c Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 18 Jan 2017 23:50:57 +0000
Subject: mime: avoid SUPER usage in Email::MIME subclass

We must call Email::Simple methods directly in our monkey patch
for Email::MIME to call the intended method.  Using SUPER in our
subclass would instead hit a different, unintended method in
Email::MIME.

Reported-by: Junio C Hamano 
	
---
 lib/PublicInbox/MIME.pm |  6 ++---
 t/mime.t                | 60 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/MIME.pm b/lib/PublicInbox/MIME.pm
index 792fffd6..54925a85 100644
--- a/lib/PublicInbox/MIME.pm
+++ b/lib/PublicInbox/MIME.pm
@@ -37,7 +37,7 @@ sub parts_multipart {
   return $self->parts_single_part
     unless $boundary and $self->body_raw =~ /^--\Q$boundary\E\s*$/sm;
 
-  $self->{body_raw} = $self->SUPER::body;
+  $self->{body_raw} = Email::Simple::body($self);
 
   # rfc1521 7.2.1
   my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2;
@@ -45,13 +45,13 @@ sub parts_multipart {
   # Split on boundaries, but keep blank lines after them intact
   my @bits = split /^--\Q$boundary\E\s*?(?=$self->{mycrlf})/m, ($body || '');
 
-  $self->SUPER::body_set(undef);
+  Email::Simple::body_set($self, undef);
 
   # If there are no headers in the potential MIME part, it's just part of the
   # body.  This is a horrible hack, although it's debatable whether it was
   # better or worse when it was $self->{body} = shift @bits ... -- rjbs,
   # 2006-11-27
-  $self->SUPER::body_set(shift @bits) if ($bits[0] || '') !~ /.*:.*/;
+  Email::Simple::body_set($self, shift @bits) if ($bits[0] || '') !~ /.*:.*/;
 
   my $bits = @bits;
 
diff --git a/t/mime.t b/t/mime.t
index cd3303d1..c4bdcf0d 100644
--- a/t/mime.t
+++ b/t/mime.t
@@ -6,6 +6,7 @@ use strict;
 use warnings;
 use Test::More;
 use_ok 'PublicInbox::MIME';
+use PublicInbox::MsgIter;
 
 my $msg = PublicInbox::MIME->new(
 'From:   Richard Hansen 
@@ -54,4 +55,63 @@ my $exp = 'Richard Hansen (2):
 ok($msg->isa('Email::MIME'), 'compatible with Email::MIME');
 is($parts[0]->body, $exp, 'body matches expected');
 
+
+my $raw = q^Date:   Wed, 18 Jan 2017 13:28:32 -0500
+From:   Santiago Torres 
+To:     Junio C Hamano 
+Cc:     git@vger.kernel.org, peff@peff.net, sunshine@sunshineco.com,
+        walters@verbum.org, Lukas Puehringer 
+Subject: Re: [PATCH v6 4/6] builtin/tag: add --format argument for tag -v
+Message-ID: <20170118182831.pkhqu2np3bh2puei@LykOS.localdomain>
+References: <20170117233723.23897-1-santiago@nyu.edu>
+ <20170117233723.23897-5-santiago@nyu.edu>
+ 
+ 
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha256;
+        protocol="application/pgp-signature"; boundary="r24xguofrazenjwe"
+Content-Disposition: inline
+In-Reply-To: 
+
+
+--r24xguofrazenjwe
+Content-Type: text/plain; charset=us-ascii
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+your tree directly?=20
+
+--r24xguofrazenjwe
+Content-Type: application/pgp-signature; name="signature.asc"
+
+-----BEGIN PGP SIGNATURE-----
+
+=7wIb
+-----END PGP SIGNATURE-----
+
+--r24xguofrazenjwe--
+
+^;
+
+$msg = PublicInbox::MIME->new($raw);
+my $nr = 0;
+msg_iter($msg, sub {
+	my ($part, $level, @ex) = @{$_[0]};
+	if ($ex[0] == 1) {
+		is($part->body_str, "your tree directly? \r\n", 'body OK');
+	} elsif ($ex[0] == 2) {
+		is($part->body, "-----BEGIN PGP SIGNATURE-----\n\n" .
+				"=7wIb\n" .
+				"-----END PGP SIGNATURE-----\n",
+			'sig "matches"');
+	} else {
+		fail "unexpected part\n";
+	}
+	$nr++;
+});
+
+is($nr, 2, 'got 2 parts');
+is($msg->as_string, $raw,
+	'stringified sufficiently close to original');
+
 done_testing();
-- 
cgit v1.2.3-24-ge0c7


From 24d417a8943d6ddac8f903731918c97d0f034c5b Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Thu, 19 Jan 2017 00:31:30 +0000
Subject: learn: implement "rm" only functionality

Do not consider this interface stable, but I just needed a
way to remove mis-imported multipart messages so
public-inbox-watch could pick them up again from my Maildir.
---
 script/public-inbox-learn | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index a696d348..38c83243 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -16,7 +16,7 @@ $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
 use PublicInbox::Address;
 use PublicInbox::Spamcheck::Spamc;
 my $train = shift or die "usage: $usage\n";
-if ($train !~ /\A(?:ham|spam)\z/) {
+if ($train !~ /\A(?:ham|spam|rm)\z/) {
 	die "`$train' not recognized.\nusage: $usage\n";
 }
 
@@ -27,15 +27,18 @@ my $mime = PublicInbox::MIME->new(eval {
 	local $/;
 	my $data = scalar ;
 	$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
-	eval {
-		if ($train eq 'ham') {
-			$spamc->hamlearn(\$data);
-		} else {
-			$spamc->spamlearn(\$data);
-		}
-		die "spamc failed with: $?\n" if $?;
-	};
-	$err = $@;
+
+	if ($train ne 'rm') {
+		eval {
+			if ($train eq 'ham') {
+				$spamc->hamlearn(\$data);
+			} elsif ($train eq 'spam') {
+				$spamc->spamlearn(\$data);
+			}
+			die "spamc failed with: $?\n" if $?;
+		};
+		$err = $@;
+	}
 	$data
 });
 
@@ -61,7 +64,7 @@ foreach my $recipient (keys %dests) {
 	my $email = $ENV{GIT_COMMITTER_EMAIL} || $recipient;
 	my $im = PublicInbox::Import->new($git, $name, $email);
 
-	if ($train eq "spam") {
+	if ($train eq "spam" || $train eq "rm") {
 		# This needs to be idempotent, as my inotify trainer
 		# may train for each cross-posted message, and this
 		# script already learns for every list in
-- 
cgit v1.2.3-24-ge0c7


From c265481528208a832c5731b8da597554f2a8f693 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 18 Jan 2017 19:13:09 +0000
Subject: watchmaildir: limit live importer processes

We don't want to be triggering OOM or swapping on weaker
systems when we have dozens of inboxes as potential targets.
---
 lib/PublicInbox/WatchMaildir.pm | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index d08f2297..0b284bdb 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -224,7 +224,14 @@ sub _importer_for {
 		my $addr = $inbox->{-primary_address};
 		PublicInbox::Import->new($git, $name, $addr, $inbox);
 	};
-	$self->{importers}->{"$im"} = $im;
+
+	my $importers = $self->{importers};
+	if (scalar(keys(%$importers)) > 2) {
+		delete $importers->{"$im"};
+		_done_for_now($self);
+	}
+
+	$importers->{"$im"} = $im;
 }
 
 sub _scrubber_for {
-- 
cgit v1.2.3-24-ge0c7


From a465cc132b8d1ad96dbd0f51ad6da2ce75c79568 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 25 Jan 2017 21:39:06 +0000
Subject: watchmaildir: allow arguments for filters

We'll want to allow some degree of configuration for
various mailing lists.
---
 lib/PublicInbox/WatchMaildir.pm | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 0b284bdb..1823c248 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -238,11 +238,17 @@ sub _scrubber_for {
 	my ($inbox) = @_;
 	my $f = $inbox->{filter};
 	if ($f && $f =~ /::/) {
+		my @args;
+		# basic line splitting, only
+		# Perhaps we can have proper quote splitting one day...
+		($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
+
 		eval "require $f";
 		if ($@) {
 			warn $@;
 		} else {
-			return $f->new;
+			# e.g: PublicInbox::Filter::Vger->new(@args)
+			return $f->new(@args);
 		}
 	}
 	undef;
-- 
cgit v1.2.3-24-ge0c7


From 7e40887e8d2bef4126b4a3680594860a3b2fd67c Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Thu, 26 Jan 2017 02:09:36 +0000
Subject: add filter for Subject: tags

Some mailing lists add annoying tags into the Subject line which
discourages readers from doing proper mail organization on the
client side.  They also waste precious screen space and
attention span.

Remove them from our archives to reduce clutter.
---
 MANIFEST                             |  2 ++
 lib/PublicInbox/Filter/SubjectTag.pm | 33 +++++++++++++++++++++++++++++++++
 t/filter_subjecttag.t                | 27 +++++++++++++++++++++++++++
 3 files changed, 62 insertions(+)
 create mode 100644 lib/PublicInbox/Filter/SubjectTag.pm
 create mode 100644 t/filter_subjecttag.t

diff --git a/MANIFEST b/MANIFEST
index 76fd1da6..f16843a9 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -51,6 +51,7 @@ lib/PublicInbox/ExtMsg.pm
 lib/PublicInbox/Feed.pm
 lib/PublicInbox/Filter/Base.pm
 lib/PublicInbox/Filter/Mirror.pm
+lib/PublicInbox/Filter/SubjectTag.pm
 lib/PublicInbox/Filter/Vger.pm
 lib/PublicInbox/GetlineBody.pm
 lib/PublicInbox/Git.pm
@@ -127,6 +128,7 @@ t/fail-bin/spamc
 t/feed.t
 t/filter_base.t
 t/filter_mirror.t
+t/filter_subjecttag.t
 t/filter_vger.t
 t/git-http-backend.psgi
 t/git-http-backend.t
diff --git a/lib/PublicInbox/Filter/SubjectTag.pm b/lib/PublicInbox/Filter/SubjectTag.pm
new file mode 100644
index 00000000..1d281425
--- /dev/null
+++ b/lib/PublicInbox/Filter/SubjectTag.pm
@@ -0,0 +1,33 @@
+# Copyright (C) 2017 all contributors 
+# License: AGPL-3.0+ 
+
+# Filter for various [tags] in subjects
+package PublicInbox::Filter::SubjectTag;
+use strict;
+use warnings;
+use base qw(PublicInbox::Filter::Base);
+
+sub new {
+	my ($class, %opts) = @_;
+	my $tag = delete $opts{-tag};
+	die "tag not defined!\n" unless defined $tag && $tag ne '';
+	my $self = $class->SUPER::new(%opts);
+	$self->{tag_re} = qr/\A\s*(re:\s+|)\Q$tag\E\s*/i;
+	$self;
+}
+
+sub scrub {
+	my ($self, $mime) = @_;
+	my $subj = $mime->header('Subject');
+	$subj =~ s/$self->{tag_re}/$1/; # $1 is "Re: "
+	$mime->header_str_set('Subject', $subj);
+	$self->ACCEPT($mime);
+}
+
+# no suffix/article rejection for mirrors
+sub delivery {
+	my ($self, $mime) = @_;
+	$self->scrub($mime);
+}
+
+1;
diff --git a/t/filter_subjecttag.t b/t/filter_subjecttag.t
new file mode 100644
index 00000000..54a219e7
--- /dev/null
+++ b/t/filter_subjecttag.t
@@ -0,0 +1,27 @@
+# Copyright (C) 2017 all contributors 
+# License: AGPL-3.0+ 
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use_ok 'PublicInbox::Filter::SubjectTag';
+
+my $f = eval { PublicInbox::Filter::SubjectTag->new };
+like($@, qr/tag not defined/, 'error without args');
+$f = PublicInbox::Filter::SubjectTag->new('-tag', '[foo]');
+is(ref $f, 'PublicInbox::Filter::SubjectTag', 'new object created');
+
+my $mime = Email::MIME->new(<
+Subject: =?UTF-8?B?UmU6IFtmb29dIEVsw4PCqWFub3I=?=
+
+EOF
+
+$mime = $f->delivery($mime);
+is($mime->header('Subject'), "Re: El\xc3\xa9anor", 'filtered with Re:');
+
+$mime->header_str_set('Subject', '[FOO] bar');
+$mime = $f->delivery($mime);
+is($mime->header('Subject'), 'bar', 'filtered non-reply');
+
+done_testing();
-- 
cgit v1.2.3-24-ge0c7


From 09e5f81e8259e7deffe0973b3547a5c77fbf3fc5 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Mon, 6 Feb 2017 02:07:24 +0000
Subject: searchview: clarify numeric summary at bottom

Xapian can only give estimated results when a result limit is
given to it, so make clear it is an estimate to avoid showing
non-sensical ranges when no results are returned.
---
 lib/PublicInbox/SearchView.pm | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index ccc53abf..5a95a055 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -145,15 +145,21 @@ sub search_nav_bot {
 	my $o = $q->{o};
 	my $end = $o + $nr;
 	my $beg = $o + 1;
-	my $rv = "

Results $beg-$end of $total";
+	my $rv = '

';
+	if ($beg <= $end) {
+		$rv .= "Results $beg-$end of $total";
+		$rv .= ' (estimated)' if $end != $total;
+	} else {
+		$rv .= "No more results, only $total";
+	}
 	my $n = $o + $LIM;
 
 	if ($n < $total) {
 		my $qs = $q->qs_html(o => $n);
-		$rv .= qq{, next}
+		$rv .= qq{  next}
 	}
 	if ($o > 0) {
-		$rv .= $n < $total ? '/' : ',      ';
+		$rv .= $n < $total ? '/' : '       ';
 		my $p = $o - $LIM;
 		my $qs = $q->qs_html(o => ($p > 0 ? $p : 0));
 		$rv .= qq{prev};
-- 
cgit v1.2.3-24-ge0c7


From c41c7aa95d55dcff7cb01a744bbf1bfb200e23b4 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Mon, 6 Feb 2017 02:38:37 +0000
Subject: searchview: increase limit for displaying search results

We are in no danger of excessive buffering or OOM-ing,
the main page for every inbox already loads 200 results;
and thread page views even load 1000!  Increase this to
200 for now.
---
 lib/PublicInbox/SearchView.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 5a95a055..f1c4b6a0 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -13,7 +13,7 @@ use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape);
 use PublicInbox::MIME;
 require PublicInbox::Git;
 require PublicInbox::SearchThread;
-our $LIM = 50;
+our $LIM = 200;
 
 sub noop {}
 
-- 
cgit v1.2.3-24-ge0c7


From 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Mon, 6 Feb 2017 19:54:25 +0000
Subject: searchidx: deal with empty In-Reply-To and References headers

In some messages, these headers exist, but have empty values.
Do not let empty values throw off our search indexer to tie
threads together, as it can make non-sensical threads grouped
to a Message-Id of "" (empty string).

See

for an example of such a message.

Thanks-to: Johannes Schindelin 
  
---
 lib/PublicInbox/SearchIdx.pm | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index d63dd7c7..1142ca7a 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -292,11 +292,15 @@ sub link_message {
 	my $mime = $smsg->{mime};
 	my $hdr = $mime->header_obj;
 	my $refs = $hdr->header_raw('References');
-	my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : ();
+	my @refs = defined $refs ? ($refs =~ /<([^>]+)>/g) : ();
 	my $irt = $hdr->header_raw('In-Reply-To');
 	if (defined $irt) {
-		$irt = mid_clean($irt);
-		$irt = undef if $mid eq $irt;
+		if ($irt eq '') {
+			$irt = undef;
+		} else {
+			$irt = mid_clean($irt);
+			$irt = undef if $mid eq $irt;
+		}
 	}
 
 	my $tid;
-- 
cgit v1.2.3-24-ge0c7


From 5d91adedf5f33ef1cb87df2a86306ddf370b4f8d Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Mon, 6 Feb 2017 21:08:13 +0000
Subject: searchidx: reindex clobbers old thread IDs

We cannot always reuse thread IDs since our threading
logic may change as bugs are fixed.
---
 lib/PublicInbox/SearchIdx.pm | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 1142ca7a..bc003c6c 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -157,6 +157,10 @@ sub add_message {
 			# it will also clobber any existing regular message
 			$doc_id = $smsg->{doc_id};
 			$old_tid = $smsg->thread_id;
+
+			# no need to remove_term for old_tid, we use a new
+			# doc to replace the old one when reindexing:
+			$old_tid = undef if $self->{reindex};
 		}
 		$smsg = PublicInbox::SearchMsg->new($mime);
 		my $doc = $smsg->{doc};
@@ -464,7 +468,7 @@ sub _git_log {
 sub _index_sync {
 	my ($self, $opts) = @_;
 	my $tip = $opts->{ref} || 'HEAD';
-	my $reindex = $opts->{reindex};
+	$self->{reindex} = $opts->{reindex};
 	my ($mkey, $last_commit, $lx, $xlog);
 	$self->{git}->batch_prepare;
 	my $xdb = _xdb_acquire($self);
@@ -474,7 +478,7 @@ sub _index_sync {
 		$mkey = 'last_commit';
 		$last_commit = $xdb->get_metadata('last_commit');
 		$lx = $last_commit;
-		if ($reindex) {
+		if ($self->{reindex}) {
 			$lx = '';
 			$mkey = undef if $last_commit ne '';
 		}
-- 
cgit v1.2.3-24-ge0c7


From 6e83825a9e49ca68694c20ddfed54368d5f3e075 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Mon, 6 Feb 2017 21:37:26 +0000
Subject: Revert "searchidx: reindex clobbers old thread IDs"

Oops, that's broken, too.  I guess the only way to reindex
after fixing the thread detection is to start from scratch.

This reverts commit 5d91adedf5f33ef1cb87df2a86306ddf370b4f8d.
---
 lib/PublicInbox/SearchIdx.pm | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index bc003c6c..1142ca7a 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -157,10 +157,6 @@ sub add_message {
 			# it will also clobber any existing regular message
 			$doc_id = $smsg->{doc_id};
 			$old_tid = $smsg->thread_id;
-
-			# no need to remove_term for old_tid, we use a new
-			# doc to replace the old one when reindexing:
-			$old_tid = undef if $self->{reindex};
 		}
 		$smsg = PublicInbox::SearchMsg->new($mime);
 		my $doc = $smsg->{doc};
@@ -468,7 +464,7 @@ sub _git_log {
 sub _index_sync {
 	my ($self, $opts) = @_;
 	my $tip = $opts->{ref} || 'HEAD';
-	$self->{reindex} = $opts->{reindex};
+	my $reindex = $opts->{reindex};
 	my ($mkey, $last_commit, $lx, $xlog);
 	$self->{git}->batch_prepare;
 	my $xdb = _xdb_acquire($self);
@@ -478,7 +474,7 @@ sub _index_sync {
 		$mkey = 'last_commit';
 		$last_commit = $xdb->get_metadata('last_commit');
 		$lx = $last_commit;
-		if ($self->{reindex}) {
+		if ($reindex) {
 			$lx = '';
 			$mkey = undef if $last_commit ne '';
 		}
-- 
cgit v1.2.3-24-ge0c7


From 4c042c7d97948a625ad5e737df941fa820b94e47 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Mon, 6 Feb 2017 21:39:45 +0000
Subject: search: schema version bump for empty References/In-Reply-To

We cannot distinguish between legitimate ghosts and mis-threaded
messages before commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0
("searchidx: deal with empty In-Reply-To and References headers")
so we must rebuild the index in parallel to fix it.
---
 lib/PublicInbox/Search.pm | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index c9094245..8c72fa17 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -39,7 +39,9 @@ use constant {
 	# 10 - optimize doc for NNTP overviews
 	# 11 - merge threads when vivifying ghosts
 	# 12 - change YYYYMMDD value column to numeric
-	SCHEMA_VERSION => 12,
+	# 13 - fix threading for empty References/In-Reply-To
+	#      (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0)
+	SCHEMA_VERSION => 13,
 
 	# n.b. FLAG_PURE_NOT is expensive not suitable for a public website
 	# as it could become a denial-of-service vector
-- 
cgit v1.2.3-24-ge0c7


From ed3ad34bbb10edbe6ea022cb2959eb556df89d6f Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Tue, 7 Feb 2017 22:27:52 +0000
Subject: TODO: several updates

Always plenty to do while working on this...
---
 TODO | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/TODO b/TODO
index b85887ad..3163b8a8 100644
--- a/TODO
+++ b/TODO
@@ -11,15 +11,28 @@ all need to be considered for everything we introduce)
 
 * POP3 server, since some webmail providers support external POP3:
   https://public-inbox.org/meta/20160411034104.GA7817@dcvr.yhbt.net/
+  Perhaps make this depend solely the NNTP server and work as a proxy.
+  Meaning users can run this without needing a full copy of the
+  archives in a git repository.
+
+* HTTP and NNTP proxy support.  Allow us to be a frontend for
+  firewalled off (or Tor-exclusive) instances.  The use case is
+  for offering a publically accessible IP with a cheap VPS,
+  yet storing large amounts of data on computers without a
+  public IP behind a home Internet connection.
 
 * TLS support for various daemons (including STARTTLS for NNTP and POP3)
 
+* NNTP COMPRESS extension (see innd)
+
 * Combined "super server" for NNTP/HTTP/POP3 to reduce memory overhead
 
 * Optional reply-to-list support for mirroring lists that want it :<
   Reply-to-list encourages the existing list as a single-point-of-failure,
   but having an extra mirror using public-inbox code is nice regardless.
 
+* Optional reply-to-nobody for dead lists.
+
 * Configurable linkification for per-inbox shorthands:
   "$gmane/123456" could be configured to expand to the
   appropriate link pointing to the gmane.org list archives,
@@ -36,7 +49,7 @@ all need to be considered for everything we introduce)
 
 * configurable constants (index limits, search results)
 
-* handle messages with multiple Message-IDs
+* handle messages with multiple Message-IDs (how?)
 
 * handle broken double-bracketed References properly (maybe)
   and totally broken Message-IDs
-- 
cgit v1.2.3-24-ge0c7


From ba4c50c20b95679580beba1ef290a4281d5285b7 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 8 Feb 2017 21:41:38 +0000
Subject: config: do not slurp lines into memory

There's no need to hold everything in memory, here,
since apparently "foreach" will read everything at
once in array context

(for some reason, I thought Perl5 was smart enough
 to avoid creating a temporary array, here...)
---
 lib/PublicInbox/Config.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 28b5bdb5..f6275cdd 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -111,7 +111,7 @@ sub git_config_dump {
 	my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n";
 	my %rv;
 	local $/ = "\n";
-	foreach my $line (<$fh>) {
+	while (defined(my $line = <$fh>)) {
 		chomp $line;
 		my ($k, $v) = split(/=/, $line, 2);
 		my $cur = $rv{$k};
-- 
cgit v1.2.3-24-ge0c7