about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2017-02-09 00:43:02 +0000
committerEric Wong <e@80x24.org>2017-02-09 00:43:02 +0000
commitfb9ed5324ec7de9420956840ba9a6585b81e8231 (patch)
treeab6b34f8459a585205a25d1f3d18d31437b89052
parent1ab8dabe04ebba61fd8761dca3d569947cbe20be (diff)
parentba4c50c20b95679580beba1ef290a4281d5285b7 (diff)
downloadpublic-inbox-fb9ed5324ec7de9420956840ba9a6585b81e8231.tar.gz
* origin/master:
  config: do not slurp lines into memory
  TODO: several updates
  search: schema version bump for empty References/In-Reply-To
  Revert "searchidx: reindex clobbers old thread IDs"
  searchidx: reindex clobbers old thread IDs
  searchidx: deal with empty In-Reply-To and References headers
  searchview: increase limit for displaying search results
  searchview: clarify numeric summary at bottom
  add filter for Subject: tags
  watchmaildir: allow arguments for filters
  watchmaildir: limit live importer processes
  learn: implement "rm" only functionality
  mime: avoid SUPER usage in Email::MIME subclass
  inbox: reinstate periodic cleanup of Xapian and SQLite objects
  introduce PublicInbox::MIME wrapper class
-rw-r--r--MANIFEST4
-rw-r--r--TODO15
-rw-r--r--lib/PublicInbox/Config.pm2
-rw-r--r--lib/PublicInbox/Feed.pm6
-rw-r--r--lib/PublicInbox/Filter/SubjectTag.pm33
-rw-r--r--lib/PublicInbox/Filter/Vger.pm2
-rw-r--r--lib/PublicInbox/Import.pm2
-rw-r--r--lib/PublicInbox/Inbox.pm22
-rw-r--r--lib/PublicInbox/MIME.pm102
-rw-r--r--lib/PublicInbox/MsgIter.pm22
-rw-r--r--lib/PublicInbox/Search.pm6
-rw-r--r--lib/PublicInbox/SearchIdx.pm14
-rw-r--r--lib/PublicInbox/SearchView.pm20
-rw-r--r--lib/PublicInbox/View.pm8
-rw-r--r--lib/PublicInbox/WWW.pm6
-rw-r--r--lib/PublicInbox/WatchMaildir.pm23
-rw-r--r--lib/PublicInbox/WwwAttach.pm4
-rwxr-xr-xscript/public-inbox-learn29
-rwxr-xr-xscript/public-inbox-mda2
-rw-r--r--t/filter_subjecttag.t27
-rw-r--r--t/import.t6
-rw-r--r--t/mime.t117
22 files changed, 395 insertions, 77 deletions
diff --git a/MANIFEST b/MANIFEST
index f235dc67..d0223c65 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -52,6 +52,7 @@ lib/PublicInbox/ExtMsg.pm
 lib/PublicInbox/Feed.pm
 lib/PublicInbox/Filter/Base.pm
 lib/PublicInbox/Filter/Mirror.pm
+lib/PublicInbox/Filter/SubjectTag.pm
 lib/PublicInbox/Filter/Vger.pm
 lib/PublicInbox/GetlineBody.pm
 lib/PublicInbox/Git.pm
@@ -69,6 +70,7 @@ lib/PublicInbox/Linkify.pm
 lib/PublicInbox/Listener.pm
 lib/PublicInbox/MDA.pm
 lib/PublicInbox/MID.pm
+lib/PublicInbox/MIME.pm
 lib/PublicInbox/Mbox.pm
 lib/PublicInbox/MsgIter.pm
 lib/PublicInbox/Msgmap.pm
@@ -152,6 +154,7 @@ t/fail-bin/spamc
 t/feed.t
 t/filter_base.t
 t/filter_mirror.t
+t/filter_subjecttag.t
 t/filter_vger.t
 t/git-http-backend.psgi
 t/git-http-backend.t
@@ -172,6 +175,7 @@ t/linkify.t
 t/main-bin/spamc
 t/mda.t
 t/mid.t
+t/mime.t
 t/msg_iter.t
 t/msgmap.t
 t/nntp.t
diff --git a/TODO b/TODO
index b85887ad..3163b8a8 100644
--- a/TODO
+++ b/TODO
@@ -11,15 +11,28 @@ all need to be considered for everything we introduce)
 
 * POP3 server, since some webmail providers support external POP3:
   https://public-inbox.org/meta/20160411034104.GA7817@dcvr.yhbt.net/
+  Perhaps make this depend solely the NNTP server and work as a proxy.
+  Meaning users can run this without needing a full copy of the
+  archives in a git repository.
+
+* HTTP and NNTP proxy support.  Allow us to be a frontend for
+  firewalled off (or Tor-exclusive) instances.  The use case is
+  for offering a publically accessible IP with a cheap VPS,
+  yet storing large amounts of data on computers without a
+  public IP behind a home Internet connection.
 
 * TLS support for various daemons (including STARTTLS for NNTP and POP3)
 
+* NNTP COMPRESS extension (see innd)
+
 * Combined "super server" for NNTP/HTTP/POP3 to reduce memory overhead
 
 * Optional reply-to-list support for mirroring lists that want it :<
   Reply-to-list encourages the existing list as a single-point-of-failure,
   but having an extra mirror using public-inbox code is nice regardless.
 
+* Optional reply-to-nobody for dead lists.
+
 * Configurable linkification for per-inbox shorthands:
   "$gmane/123456" could be configured to expand to the
   appropriate link pointing to the gmane.org list archives,
@@ -36,7 +49,7 @@ all need to be considered for everything we introduce)
 
 * configurable constants (index limits, search results)
 
-* handle messages with multiple Message-IDs
+* handle messages with multiple Message-IDs (how?)
 
 * handle broken double-bracketed References properly (maybe)
   and totally broken Message-IDs
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 28b5bdb5..f6275cdd 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -111,7 +111,7 @@ sub git_config_dump {
         my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n";
         my %rv;
         local $/ = "\n";
-        foreach my $line (<$fh>) {
+        while (defined(my $line = <$fh>)) {
                 chomp $line;
                 my ($k, $v) = split(/=/, $line, 2);
                 my $cur = $rv{$k};
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 2a33fd29..e5d57550 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -5,7 +5,7 @@
 package PublicInbox::Feed;
 use strict;
 use warnings;
-use Email::MIME;
+use PublicInbox::MIME;
 use PublicInbox::View;
 use PublicInbox::WwwAtomStream;
 
@@ -39,7 +39,7 @@ sub generate_thread_atom {
         PublicInbox::WwwAtomStream->response($ctx, 200, sub {
                 while (my $msg = shift @$msgs) {
                         $msg = $ibx->msg_by_smsg($msg) and
-                                        return Email::MIME->new($msg);
+                                return PublicInbox::MIME->new($msg);
                 }
         });
 }
@@ -175,7 +175,7 @@ sub each_recent_blob {
 sub do_cat_mail {
         my ($ibx, $path) = @_;
         my $mime = eval { $ibx->msg_by_path($path) } or return;
-        Email::MIME->new($mime);
+        PublicInbox::MIME->new($mime);
 }
 
 1;
diff --git a/lib/PublicInbox/Filter/SubjectTag.pm b/lib/PublicInbox/Filter/SubjectTag.pm
new file mode 100644
index 00000000..1d281425
--- /dev/null
+++ b/lib/PublicInbox/Filter/SubjectTag.pm
@@ -0,0 +1,33 @@
+# Copyright (C) 2017 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Filter for various [tags] in subjects
+package PublicInbox::Filter::SubjectTag;
+use strict;
+use warnings;
+use base qw(PublicInbox::Filter::Base);
+
+sub new {
+        my ($class, %opts) = @_;
+        my $tag = delete $opts{-tag};
+        die "tag not defined!\n" unless defined $tag && $tag ne '';
+        my $self = $class->SUPER::new(%opts);
+        $self->{tag_re} = qr/\A\s*(re:\s+|)\Q$tag\E\s*/i;
+        $self;
+}
+
+sub scrub {
+        my ($self, $mime) = @_;
+        my $subj = $mime->header('Subject');
+        $subj =~ s/$self->{tag_re}/$1/; # $1 is "Re: "
+        $mime->header_str_set('Subject', $subj);
+        $self->ACCEPT($mime);
+}
+
+# no suffix/article rejection for mirrors
+sub delivery {
+        my ($self, $mime) = @_;
+        $self->scrub($mime);
+}
+
+1;
diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm
index 2ffed184..905f28d7 100644
--- a/lib/PublicInbox/Filter/Vger.pm
+++ b/lib/PublicInbox/Filter/Vger.pm
@@ -25,7 +25,7 @@ sub scrub {
         # so in multipart (e.g. GPG-signed) messages, the list trailer
         # becomes invisible to MIME-aware email clients.
         if ($s =~ s/$l0\n$l1\n$l2\n$l3\n($l4\n)?\z//os) {
-                $mime = Email::MIME->new(\$s);
+                $mime = PublicInbox::MIME->new(\$s);
         }
         $self->ACCEPT($mime);
 }
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 1ac112b8..13671a4f 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -119,7 +119,7 @@ sub remove {
         $n = read($r, my $lf, 1);
         defined($n) or die "read final byte of cat-blob failed: $!";
         die "bad read on final byte: <$lf>" if $lf ne "\n";
-        my $cur = Email::MIME->new($buf);
+        my $cur = PublicInbox::MIME->new($buf);
         my $cur_s = $cur->header('Subject');
         $cur_s = '' unless defined $cur_s;
         my $cur_m = $mime->header('Subject');
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 1a844e1c..999f813b 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -7,6 +7,7 @@ use strict;
 use warnings;
 use PublicInbox::Git;
 use PublicInbox::MID qw(mid2path);
+use Devel::Peek qw(SvREFCNT);
 
 my $cleanup_timer;
 eval {
@@ -18,10 +19,20 @@ eval {
 my $CLEANUP = {}; # string(inbox) -> inbox
 sub cleanup_task () {
         $cleanup_timer = undef;
-        delete $_->{git} for values %$CLEANUP;
+        for my $ibx (values %$CLEANUP) {
+                foreach my $f (qw(git mm search)) {
+                        delete $ibx->{$f} if SvREFCNT($ibx->{$f}) == 1;
+                }
+        }
         $CLEANUP = {};
 }
 
+sub _cleanup_later ($) {
+        my ($self) = @_;
+        $cleanup_timer ||= PublicInbox::EvCleanup::later(*cleanup_task);
+        $CLEANUP->{"$self"} = $self;
+}
+
 sub _set_uint ($$$) {
         my ($opts, $field, $default) = @_;
         my $val = $opts->{$field};
@@ -70,20 +81,23 @@ sub git {
         $self->{git} ||= eval {
                 my $g = PublicInbox::Git->new($self->{mainrepo});
                 $g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
-                $cleanup_timer ||= PublicInbox::EvCleanup::later(*cleanup_task);
-                $CLEANUP->{"$self"} = $self;
+                _cleanup_later($self);
                 $g;
         };
 }
 
 sub mm {
         my ($self) = @_;
-        $self->{mm} ||= eval { PublicInbox::Msgmap->new($self->{mainrepo}) };
+        $self->{mm} ||= eval {
+                _cleanup_later($self);
+                PublicInbox::Msgmap->new($self->{mainrepo});
+        };
 }
 
 sub search {
         my ($self) = @_;
         $self->{search} ||= eval {
+                _cleanup_later($self);
                 PublicInbox::Search->new($self->{mainrepo}, $self->{altid});
         };
 }
diff --git a/lib/PublicInbox/MIME.pm b/lib/PublicInbox/MIME.pm
new file mode 100644
index 00000000..54925a85
--- /dev/null
+++ b/lib/PublicInbox/MIME.pm
@@ -0,0 +1,102 @@
+# This library is free software; you can redistribute it and/or modify
+# it under the same terms as Perl itself.
+#
+# The license for this file differs from the rest of public-inbox.
+#
+# It monkey patches the "parts_multipart" subroutine with patches
+# from Matthew Horsfall <wolfsage@gmail.com> at:
+#
+# git clone --mirror https://github.com/rjbs/Email-MIME.git refs/pull/28/head
+#
+# commit fe0eb870ab732507aa39a1070a2fd9435c7e4877
+# ("Make sure we don't modify the body of a message when injecting a header.")
+# commit 981d8201a7239b02114489529fd366c4c576a146
+# ("GH #14 - Handle CRLF emails properly.")
+# commit 2338d93598b5e8432df24bda8dfdc231bdeb666e
+# ("GH #14 - Support multipart messages without content-type in subparts.")
+#
+# For Email::MIME >= 1.923 && < 1.935,
+# commit dcef9be66c49ae89c7a5027a789bbbac544499ce
+# ("removing all trailing newlines was too much")
+# is also included
+package PublicInbox::MIME;
+use strict;
+use warnings;
+use base qw(Email::MIME);
+
+if ($Email::MIME::VERSION <= 1.937) {
+sub parts_multipart {
+  my $self     = shift;
+  my $boundary = $self->{ct}->{attributes}->{boundary};
+
+  # Take a message, join all its lines together.  Now try to Email::MIME->new
+  # it with 1.861 or earlier.  Death!  It tries to recurse endlessly on the
+  # body, because every time it splits on boundary it gets itself. Obviously
+  # that means it's a bogus message, but a mangled result (or exception) is
+  # better than endless recursion. -- rjbs, 2008-01-07
+  return $self->parts_single_part
+    unless $boundary and $self->body_raw =~ /^--\Q$boundary\E\s*$/sm;
+
+  $self->{body_raw} = Email::Simple::body($self);
+
+  # rfc1521 7.2.1
+  my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2;
+
+  # Split on boundaries, but keep blank lines after them intact
+  my @bits = split /^--\Q$boundary\E\s*?(?=$self->{mycrlf})/m, ($body || '');
+
+  Email::Simple::body_set($self, undef);
+
+  # If there are no headers in the potential MIME part, it's just part of the
+  # body.  This is a horrible hack, although it's debatable whether it was
+  # better or worse when it was $self->{body} = shift @bits ... -- rjbs,
+  # 2006-11-27
+  Email::Simple::body_set($self, shift @bits) if ($bits[0] || '') !~ /.*:.*/;
+
+  my $bits = @bits;
+
+  my @parts;
+  for my $bit (@bits) {
+    # Parts don't need headers. If they don't have them, they look like this:
+    #
+    #   --90e6ba6e8d06f1723604fc1b809a
+    #
+    #   Part 2
+    #
+    #   Part 2a
+    #
+    # $bit will contain two new lines before Part 2.
+    #
+    # Anything with headers will only have one new line.
+    #
+    # RFC 1341 Section 7.2 says parts without headers are to be considered
+    # plain US-ASCII text. -- alh
+    # 2016-08-01
+    my $added_header;
+
+    if ($bit =~ /^(?:$self->{mycrlf}){2}/) {
+      $bit = "Content-type: text/plain; charset=us-ascii" . $bit;
+
+      $added_header = 1;
+    }
+
+    $bit =~ s/\A[\n\r]+//smg;
+    $bit =~ s/(?<!\x0d)$self->{mycrlf}\Z//sm;
+
+    my $email = (ref $self)->new($bit);
+
+    if ($added_header) {
+      # Remove our changes so we don't change the raw email content
+      $email->header_str_set('Content-Type');
+    }
+
+    push @parts, $email;
+  }
+
+  $self->{parts} = \@parts;
+
+  return @{ $self->{parts} };
+}
+}
+
+1;
diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm
index ef0d209f..5be06a44 100644
--- a/lib/PublicInbox/MsgIter.pm
+++ b/lib/PublicInbox/MsgIter.pm
@@ -6,17 +6,7 @@ use strict;
 use warnings;
 use base qw(Exporter);
 our @EXPORT = qw(msg_iter);
-use Email::MIME;
-use Scalar::Util qw(readonly);
-
-# Workaround Email::MIME versions without
-# commit dcef9be66c49ae89c7a5027a789bbbac544499ce
-# ("removing all trailing newlines was too much")
-# This is necessary for Debian jessie
-my $bad = 1.923;
-my $good = 1.935;
-my $ver = $Email::MIME::VERSION;
-my $extra_nl = 1 if ($ver >= $bad && $ver < $good);
+use PublicInbox::MIME;
 
 # Like Email::MIME::walk_parts, but this is:
 # * non-recursive
@@ -36,16 +26,6 @@ sub msg_iter ($$) {
                                 @sub = map { [ $_, $depth, @idx, ++$i ] } @sub;
                                 @parts = (@sub, @parts);
                         } else {
-                                if ($extra_nl) {
-                                        my $lf = $part->{mycrlf};
-                                        my $bref = $part->{body};
-                                        if (readonly($$bref)) {
-                                                my $s = $$bref . $lf;
-                                                $part->{body} = \$s;
-                                        } else {
-                                                $$bref .= $lf;
-                                        }
-                                }
                                 $cb->($p);
                         }
                 }
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index a1bae419..8c72fa17 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -16,7 +16,7 @@ use constant YYYYMMDD => 4; # for searching in the WWW UI
 
 use Search::Xapian qw/:standard/;
 use PublicInbox::SearchMsg;
-use Email::MIME;
+use PublicInbox::MIME;
 use PublicInbox::MID qw/mid_clean id_compress/;
 
 # This is English-only, everything else is non-standard and may be confused as
@@ -39,7 +39,9 @@ use constant {
         # 10 - optimize doc for NNTP overviews
         # 11 - merge threads when vivifying ghosts
         # 12 - change YYYYMMDD value column to numeric
-        SCHEMA_VERSION => 12,
+        # 13 - fix threading for empty References/In-Reply-To
+        #      (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0)
+        SCHEMA_VERSION => 13,
 
         # n.b. FLAG_PURE_NOT is expensive not suitable for a public website
         # as it could become a denial-of-service vector
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 5adc17d3..c0ea3c1e 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -10,7 +10,7 @@ package PublicInbox::SearchIdx;
 use strict;
 use warnings;
 use Fcntl qw(:flock :DEFAULT);
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType;
 $Email::MIME::ContentType::STRICT_PARAMS = 0;
 use base qw(PublicInbox::Search);
@@ -285,11 +285,15 @@ sub link_message {
         my $mime = $smsg->{mime};
         my $hdr = $mime->header_obj;
         my $refs = $hdr->header_raw('References');
-        my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : ();
+        my @refs = defined $refs ? ($refs =~ /<([^>]+)>/g) : ();
         my $irt = $hdr->header_raw('In-Reply-To');
         if (defined $irt) {
-                $irt = mid_clean($irt);
-                $irt = undef if $mid eq $irt;
+                if ($irt eq '') {
+                        $irt = undef;
+                } else {
+                        $irt = mid_clean($irt);
+                        $irt = undef if $mid eq $irt;
+                }
         }
 
         my $tid;
@@ -393,7 +397,7 @@ sub do_cat_mail {
                 my $str = $git->cat_file($blob, $sizeref);
                 # fixup bugs from import:
                 $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
-                Email::MIME->new($str);
+                PublicInbox::MIME->new($str);
         };
         $@ ? undef : $mime;
 }
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index bd634d8d..f1c4b6a0 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -10,10 +10,10 @@ use PublicInbox::Hval qw/ascii_html/;
 use PublicInbox::View;
 use PublicInbox::WwwAtomStream;
 use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape);
-use Email::MIME;
+use PublicInbox::MIME;
 require PublicInbox::Git;
 require PublicInbox::SearchThread;
-our $LIM = 50;
+our $LIM = 200;
 
 sub noop {}
 
@@ -145,15 +145,21 @@ sub search_nav_bot {
         my $o = $q->{o};
         my $end = $o + $nr;
         my $beg = $o + 1;
-        my $rv = "</pre><hr><pre>Results $beg-$end of $total";
+        my $rv = '</pre><hr><pre>';
+        if ($beg <= $end) {
+                $rv .= "Results $beg-$end of $total";
+                $rv .= ' (estimated)' if $end != $total;
+        } else {
+                $rv .= "No more results, only $total";
+        }
         my $n = $o + $LIM;
 
         if ($n < $total) {
                 my $qs = $q->qs_html(o => $n);
-                $rv .= qq{, <a\nhref="?$qs"\nrel=next>next</a>}
+                $rv .= qq{  <a\nhref="?$qs"\nrel=next>next</a>}
         }
         if ($o > 0) {
-                $rv .= $n < $total ? '/' : ',      ';
+                $rv .= $n < $total ? '/' : '       ';
                 my $p = $o - $LIM;
                 my $qs = $q->qs_html(o => ($p > 0 ? $p : 0));
                 $rv .= qq{<a\nhref="?$qs"\nrel=prev>prev</a>};
@@ -205,7 +211,7 @@ sub mset_thread {
                         $mime = $inbox->msg_by_smsg($mime) and last;
                 }
                 if ($mime) {
-                        $mime = Email::MIME->new($mime);
+                        $mime = PublicInbox::MIME->new($mime);
                         return PublicInbox::View::index_entry($mime, $ctx,
                                 scalar @$msgs);
                 }
@@ -239,7 +245,7 @@ sub adump {
                 while (my $x = shift @items) {
                         $x = load_doc_retry($srch, $x);
                         $x = $ibx->msg_by_smsg($x) and
-                                        return Email::MIME->new($x);
+                                        return PublicInbox::MIME->new($x);
                 }
                 return undef;
         });
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index e4e9d7d2..2c37cd42 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -299,7 +299,7 @@ sub stream_thread ($$) {
         }
         return missing_thread($ctx) unless $mime;
 
-        $mime = Email::MIME->new($mime);
+        $mime = PublicInbox::MIME->new($mime);
         $ctx->{-title_html} = ascii_html($mime->header('Subject'));
         $ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime);
         PublicInbox::WwwStream->response($ctx, 200, sub {
@@ -311,7 +311,7 @@ sub stream_thread ($$) {
                         unshift @q, map { ($cl, $_) } @{$node->{children}};
                         my $mid = $node->{id};
                         if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
-                                $mime = Email::MIME->new($mime);
+                                $mime = PublicInbox::MIME->new($mime);
                                 return thread_index_entry($ctx, $level, $mime);
                         } else {
                                 return ghost_index_entry($ctx, $level, $node);
@@ -362,7 +362,7 @@ sub thread_html {
                 $mime = $inbox->msg_by_smsg($mime) and last;
         }
         return missing_thread($ctx) unless $mime;
-        $mime = Email::MIME->new($mime);
+        $mime = PublicInbox::MIME->new($mime);
         $ctx->{-title_html} = ascii_html($mime->header('Subject'));
         $ctx->{-html_tip} = '<pre>'.index_entry($mime, $ctx, scalar @$msgs);
         $mime = undef;
@@ -372,7 +372,7 @@ sub thread_html {
                         $mime = $inbox->msg_by_smsg($mime) and last;
                 }
                 if ($mime) {
-                        $mime = Email::MIME->new($mime);
+                        $mime = PublicInbox::MIME->new($mime);
                         return index_entry($mime, $ctx, scalar @$msgs);
                 }
                 $msgs = undef;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 11fc92e9..430e6b19 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -113,7 +113,7 @@ sub preload {
         require PublicInbox::Feed;
         require PublicInbox::View;
         require PublicInbox::SearchThread;
-        require Email::MIME;
+        require PublicInbox::MIME;
         require Digest::SHA;
         require POSIX;
 
@@ -225,8 +225,8 @@ sub get_mid_html {
         my $x = mid2blob($ctx) or return r404($ctx);
 
         require PublicInbox::View;
-        require Email::MIME;
-        my $mime = Email::MIME->new($x);
+        require PublicInbox::MIME;
+        my $mime = PublicInbox::MIME->new($x);
         searcher($ctx);
         PublicInbox::View::msg_html($ctx, $mime);
 }
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index b7c2d17a..1823c248 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -6,7 +6,7 @@
 package PublicInbox::WatchMaildir;
 use strict;
 use warnings;
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType;
 $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
 use PublicInbox::Git;
@@ -207,7 +207,7 @@ sub _path_to_mime {
                 local $/;
                 my $str = <$fh>;
                 $str or return;
-                return Email::MIME->new(\$str);
+                return PublicInbox::MIME->new(\$str);
         } elsif ($!{ENOENT}) {
                 return;
         } else {
@@ -224,18 +224,31 @@ sub _importer_for {
                 my $addr = $inbox->{-primary_address};
                 PublicInbox::Import->new($git, $name, $addr, $inbox);
         };
-        $self->{importers}->{"$im"} = $im;
+
+        my $importers = $self->{importers};
+        if (scalar(keys(%$importers)) > 2) {
+                delete $importers->{"$im"};
+                _done_for_now($self);
+        }
+
+        $importers->{"$im"} = $im;
 }
 
 sub _scrubber_for {
         my ($inbox) = @_;
         my $f = $inbox->{filter};
         if ($f && $f =~ /::/) {
+                my @args;
+                # basic line splitting, only
+                # Perhaps we can have proper quote splitting one day...
+                ($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
+
                 eval "require $f";
                 if ($@) {
                         warn $@;
                 } else {
-                        return $f->new;
+                        # e.g: PublicInbox::Filter::Vger->new(@args)
+                        return $f->new(@args);
                 }
         }
         undef;
@@ -247,7 +260,7 @@ sub _spamcheck_cb {
                 my ($mime) = @_;
                 my $tmp = '';
                 if ($sc->spamcheck($mime, \$tmp)) {
-                        return Email::MIME->new(\$tmp);
+                        return PublicInbox::MIME->new(\$tmp);
                 }
                 warn $mime->header('Message-ID')." failed spam check\n";
                 undef;
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index 33bfce27..a5ba5b2b 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -5,7 +5,7 @@
 package PublicInbox::WwwAttach; # internal package
 use strict;
 use warnings;
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType qw(parse_content_type);
 $Email::MIME::ContentType::STRICT_PARAMS = 0;
 use PublicInbox::MsgIter;
@@ -15,7 +15,7 @@ sub get_attach ($$$) {
         my ($ctx, $idx, $fn) = @_;
         my $res = [ 404, [ 'Content-Type', 'text/plain' ], [ "Not found\n" ] ];
         my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res;
-        $mime = Email::MIME->new($mime);
+        $mime = PublicInbox::MIME->new($mime);
         msg_iter($mime, sub {
                 my ($part, $depth, @idx) = @{$_[0]};
                 return if join('.', @idx) ne $idx;
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 396ab489..38c83243 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -10,32 +10,35 @@ use warnings;
 use PublicInbox::Config;
 use PublicInbox::Git;
 use PublicInbox::Import;
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType;
 $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
 use PublicInbox::Address;
 use PublicInbox::Spamcheck::Spamc;
 my $train = shift or die "usage: $usage\n";
-if ($train !~ /\A(?:ham|spam)\z/) {
+if ($train !~ /\A(?:ham|spam|rm)\z/) {
         die "`$train' not recognized.\nusage: $usage\n";
 }
 
 my $spamc = PublicInbox::Spamcheck::Spamc->new;
 my $pi_config = PublicInbox::Config->new;
 my $err;
-my $mime = Email::MIME->new(eval {
+my $mime = PublicInbox::MIME->new(eval {
         local $/;
         my $data = scalar <STDIN>;
         $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
-        eval {
-                if ($train eq 'ham') {
-                        $spamc->hamlearn(\$data);
-                } else {
-                        $spamc->spamlearn(\$data);
-                }
-                die "spamc failed with: $?\n" if $?;
-        };
-        $err = $@;
+
+        if ($train ne 'rm') {
+                eval {
+                        if ($train eq 'ham') {
+                                $spamc->hamlearn(\$data);
+                        } elsif ($train eq 'spam') {
+                                $spamc->spamlearn(\$data);
+                        }
+                        die "spamc failed with: $?\n" if $?;
+                };
+                $err = $@;
+        }
         $data
 });
 
@@ -61,7 +64,7 @@ foreach my $recipient (keys %dests) {
         my $email = $ENV{GIT_COMMITTER_EMAIL} || $recipient;
         my $im = PublicInbox::Import->new($git, $name, $email);
 
-        if ($train eq "spam") {
+        if ($train eq "spam" || $train eq "rm") {
                 # This needs to be idempotent, as my inotify trainer
                 # may train for each cross-posted message, and this
                 # script already learns for every list in
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 8b5258f5..60f817dc 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -52,7 +52,7 @@ $simple = undef;
 $emm = PublicInbox::Emergency->new($emergency);
 $emm->prepare(\$str);
 $ems = $ems->abort;
-my $mime = Email::MIME->new(\$str);
+my $mime = PublicInbox::MIME->new(\$str);
 $str = '';
 do_exit(0) unless $spam_ok;
 
diff --git a/t/filter_subjecttag.t b/t/filter_subjecttag.t
new file mode 100644
index 00000000..54a219e7
--- /dev/null
+++ b/t/filter_subjecttag.t
@@ -0,0 +1,27 @@
+# Copyright (C) 2017 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use_ok 'PublicInbox::Filter::SubjectTag';
+
+my $f = eval { PublicInbox::Filter::SubjectTag->new };
+like($@, qr/tag not defined/, 'error without args');
+$f = PublicInbox::Filter::SubjectTag->new('-tag', '[foo]');
+is(ref $f, 'PublicInbox::Filter::SubjectTag', 'new object created');
+
+my $mime = Email::MIME->new(<<EOF);
+To: you <you\@example.com>
+Subject: =?UTF-8?B?UmU6IFtmb29dIEVsw4PCqWFub3I=?=
+
+EOF
+
+$mime = $f->delivery($mime);
+is($mime->header('Subject'), "Re: El\xc3\xa9anor", 'filtered with Re:');
+
+$mime->header_str_set('Subject', '[FOO] bar');
+$mime = $f->delivery($mime);
+is($mime->header('Subject'), 'bar', 'filtered non-reply');
+
+done_testing();
diff --git a/t/import.t b/t/import.t
index 73f92adb..29d00b05 100644
--- a/t/import.t
+++ b/t/import.t
@@ -3,7 +3,7 @@
 use strict;
 use warnings;
 use Test::More;
-use Email::MIME;
+use PublicInbox::MIME;
 use PublicInbox::Git;
 use PublicInbox::Import;
 use File::Temp qw/tempdir/;
@@ -13,7 +13,7 @@ is(system(qw(git init -q --bare), $dir), 0, 'git init successful');
 my $git = PublicInbox::Git->new($dir);
 
 my $im = PublicInbox::Import->new($git, 'testbox', 'test@example');
-my $mime = Email::MIME->create(
+my $mime = PublicInbox::MIME->create(
         header => [
                 From => 'a@example.com',
                 To => 'b@example.com',
@@ -50,7 +50,7 @@ $im->done;
 is(scalar @revs, 26, '26 revisions exist after mass import');
 my ($mark, $msg) = $im->remove($mime);
 like($mark, qr/\A:\d+\z/, 'got mark');
-is(ref($msg), 'Email::MIME', 'got old message deleted');
+is(ref($msg), 'PublicInbox::MIME', 'got old message deleted');
 
 is(undef, $im->remove($mime), 'remove is idempotent');
 
diff --git a/t/mime.t b/t/mime.t
new file mode 100644
index 00000000..c4bdcf0d
--- /dev/null
+++ b/t/mime.t
@@ -0,0 +1,117 @@
+# Copyright (C) 2017 all contributors <meta@public-inbox.org>
+# This library is free software; you can redistribute it and/or modify
+# it under the same terms as Perl itself.
+# Artistic or GPL-1+ <https://www.gnu.org/licenses/gpl-1.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use_ok 'PublicInbox::MIME';
+use PublicInbox::MsgIter;
+
+my $msg = PublicInbox::MIME->new(
+'From:   Richard Hansen <hansenr@google.com>
+To:     git@vger.kernel.org
+Cc:     Richard Hansen <hansenr@google.com>
+Subject: [PATCH 0/2] minor diff orderfile documentation improvements
+Date:   Mon,  9 Jan 2017 19:40:29 -0500
+Message-Id: <20170110004031.57985-1-hansenr@google.com>
+X-Mailer: git-send-email 2.11.0.390.gc69c2f50cf-goog
+Content-Type: multipart/signed; protocol="application/pkcs7-signature"; micalg=sha-256;
+        boundary="94eb2c0bc864b76ba30545b2bca9"
+
+--94eb2c0bc864b76ba30545b2bca9
+
+Richard Hansen (2):
+  diff: document behavior of relative diff.orderFile
+  diff: document the pattern format for diff.orderFile
+
+ Documentation/diff-config.txt  | 5 ++++-
+ Documentation/diff-options.txt | 3 ++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+
+--94eb2c0bc864b76ba30545b2bca9
+Content-Type: application/pkcs7-signature; name="smime.p7s"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; filename="smime.p7s"
+Content-Description: (truncated) S/MIME Cryptographic Signature
+
+dkTlB69771K2eXK4LcHSH/2LqX+VYa3K44vrx1ruzjXdNWzIpKBy0weFNiwnJCGofvCysM2RCSI1
+--94eb2c0bc864b76ba30545b2bca9--
+
+');
+
+my @parts = $msg->parts;
+my $exp = 'Richard Hansen (2):
+  diff: document behavior of relative diff.orderFile
+  diff: document the pattern format for diff.orderFile
+
+ Documentation/diff-config.txt  | 5 ++++-
+ Documentation/diff-options.txt | 3 ++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+';
+
+ok($msg->isa('Email::MIME'), 'compatible with Email::MIME');
+is($parts[0]->body, $exp, 'body matches expected');
+
+
+my $raw = q^Date:   Wed, 18 Jan 2017 13:28:32 -0500
+From:   Santiago Torres <santiago@nyu.edu>
+To:     Junio C Hamano <gitster@pobox.com>
+Cc:     git@vger.kernel.org, peff@peff.net, sunshine@sunshineco.com,
+        walters@verbum.org, Lukas Puehringer <luk.puehringer@gmail.com>
+Subject: Re: [PATCH v6 4/6] builtin/tag: add --format argument for tag -v
+Message-ID: <20170118182831.pkhqu2np3bh2puei@LykOS.localdomain>
+References: <20170117233723.23897-1-santiago@nyu.edu>
+ <20170117233723.23897-5-santiago@nyu.edu>
+ <xmqqmvepb4oj.fsf@gitster.mtv.corp.google.com>
+ <xmqqh94wb4y0.fsf@gitster.mtv.corp.google.com>
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha256;
+        protocol="application/pgp-signature"; boundary="r24xguofrazenjwe"
+Content-Disposition: inline
+In-Reply-To: <xmqqh94wb4y0.fsf@gitster.mtv.corp.google.com>
+
+
+--r24xguofrazenjwe
+Content-Type: text/plain; charset=us-ascii
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+your tree directly?=20
+
+--r24xguofrazenjwe
+Content-Type: application/pgp-signature; name="signature.asc"
+
+-----BEGIN PGP SIGNATURE-----
+
+=7wIb
+-----END PGP SIGNATURE-----
+
+--r24xguofrazenjwe--
+
+^;
+
+$msg = PublicInbox::MIME->new($raw);
+my $nr = 0;
+msg_iter($msg, sub {
+        my ($part, $level, @ex) = @{$_[0]};
+        if ($ex[0] == 1) {
+                is($part->body_str, "your tree directly? \r\n", 'body OK');
+        } elsif ($ex[0] == 2) {
+                is($part->body, "-----BEGIN PGP SIGNATURE-----\n\n" .
+                                "=7wIb\n" .
+                                "-----END PGP SIGNATURE-----\n",
+                        'sig "matches"');
+        } else {
+                fail "unexpected part\n";
+        }
+        $nr++;
+});
+
+is($nr, 2, 'got 2 parts');
+is($msg->as_string, $raw,
+        'stringified sufficiently close to original');
+
+done_testing();