diff options
author | Eric Wong <e@80x24.org> | 2017-02-09 00:43:02 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2017-02-09 00:43:02 +0000 |
commit | fb9ed5324ec7de9420956840ba9a6585b81e8231 (patch) | |
tree | ab6b34f8459a585205a25d1f3d18d31437b89052 | |
parent | 1ab8dabe04ebba61fd8761dca3d569947cbe20be (diff) | |
parent | ba4c50c20b95679580beba1ef290a4281d5285b7 (diff) | |
download | public-inbox-fb9ed5324ec7de9420956840ba9a6585b81e8231.tar.gz |
* origin/master: config: do not slurp lines into memory TODO: several updates search: schema version bump for empty References/In-Reply-To Revert "searchidx: reindex clobbers old thread IDs" searchidx: reindex clobbers old thread IDs searchidx: deal with empty In-Reply-To and References headers searchview: increase limit for displaying search results searchview: clarify numeric summary at bottom add filter for Subject: tags watchmaildir: allow arguments for filters watchmaildir: limit live importer processes learn: implement "rm" only functionality mime: avoid SUPER usage in Email::MIME subclass inbox: reinstate periodic cleanup of Xapian and SQLite objects introduce PublicInbox::MIME wrapper class
-rw-r--r-- | MANIFEST | 4 | ||||
-rw-r--r-- | TODO | 15 | ||||
-rw-r--r-- | lib/PublicInbox/Config.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/Feed.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/Filter/SubjectTag.pm | 33 | ||||
-rw-r--r-- | lib/PublicInbox/Filter/Vger.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/Import.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/Inbox.pm | 22 | ||||
-rw-r--r-- | lib/PublicInbox/MIME.pm | 102 | ||||
-rw-r--r-- | lib/PublicInbox/MsgIter.pm | 22 | ||||
-rw-r--r-- | lib/PublicInbox/Search.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 14 | ||||
-rw-r--r-- | lib/PublicInbox/SearchView.pm | 20 | ||||
-rw-r--r-- | lib/PublicInbox/View.pm | 8 | ||||
-rw-r--r-- | lib/PublicInbox/WWW.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/WatchMaildir.pm | 23 | ||||
-rw-r--r-- | lib/PublicInbox/WwwAttach.pm | 4 | ||||
-rwxr-xr-x | script/public-inbox-learn | 29 | ||||
-rwxr-xr-x | script/public-inbox-mda | 2 | ||||
-rw-r--r-- | t/filter_subjecttag.t | 27 | ||||
-rw-r--r-- | t/import.t | 6 | ||||
-rw-r--r-- | t/mime.t | 117 |
22 files changed, 395 insertions, 77 deletions
@@ -52,6 +52,7 @@ lib/PublicInbox/ExtMsg.pm lib/PublicInbox/Feed.pm lib/PublicInbox/Filter/Base.pm lib/PublicInbox/Filter/Mirror.pm +lib/PublicInbox/Filter/SubjectTag.pm lib/PublicInbox/Filter/Vger.pm lib/PublicInbox/GetlineBody.pm lib/PublicInbox/Git.pm @@ -69,6 +70,7 @@ lib/PublicInbox/Linkify.pm lib/PublicInbox/Listener.pm lib/PublicInbox/MDA.pm lib/PublicInbox/MID.pm +lib/PublicInbox/MIME.pm lib/PublicInbox/Mbox.pm lib/PublicInbox/MsgIter.pm lib/PublicInbox/Msgmap.pm @@ -152,6 +154,7 @@ t/fail-bin/spamc t/feed.t t/filter_base.t t/filter_mirror.t +t/filter_subjecttag.t t/filter_vger.t t/git-http-backend.psgi t/git-http-backend.t @@ -172,6 +175,7 @@ t/linkify.t t/main-bin/spamc t/mda.t t/mid.t +t/mime.t t/msg_iter.t t/msgmap.t t/nntp.t @@ -11,15 +11,28 @@ all need to be considered for everything we introduce) * POP3 server, since some webmail providers support external POP3: https://public-inbox.org/meta/20160411034104.GA7817@dcvr.yhbt.net/ + Perhaps make this depend solely the NNTP server and work as a proxy. + Meaning users can run this without needing a full copy of the + archives in a git repository. + +* HTTP and NNTP proxy support. Allow us to be a frontend for + firewalled off (or Tor-exclusive) instances. The use case is + for offering a publically accessible IP with a cheap VPS, + yet storing large amounts of data on computers without a + public IP behind a home Internet connection. * TLS support for various daemons (including STARTTLS for NNTP and POP3) +* NNTP COMPRESS extension (see innd) + * Combined "super server" for NNTP/HTTP/POP3 to reduce memory overhead * Optional reply-to-list support for mirroring lists that want it :< Reply-to-list encourages the existing list as a single-point-of-failure, but having an extra mirror using public-inbox code is nice regardless. +* Optional reply-to-nobody for dead lists. + * Configurable linkification for per-inbox shorthands: "$gmane/123456" could be configured to expand to the appropriate link pointing to the gmane.org list archives, @@ -36,7 +49,7 @@ all need to be considered for everything we introduce) * configurable constants (index limits, search results) -* handle messages with multiple Message-IDs +* handle messages with multiple Message-IDs (how?) * handle broken double-bracketed References properly (maybe) and totally broken Message-IDs diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 28b5bdb5..f6275cdd 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -111,7 +111,7 @@ sub git_config_dump { my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n"; my %rv; local $/ = "\n"; - foreach my $line (<$fh>) { + while (defined(my $line = <$fh>)) { chomp $line; my ($k, $v) = split(/=/, $line, 2); my $cur = $rv{$k}; diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 2a33fd29..e5d57550 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -5,7 +5,7 @@ package PublicInbox::Feed; use strict; use warnings; -use Email::MIME; +use PublicInbox::MIME; use PublicInbox::View; use PublicInbox::WwwAtomStream; @@ -39,7 +39,7 @@ sub generate_thread_atom { PublicInbox::WwwAtomStream->response($ctx, 200, sub { while (my $msg = shift @$msgs) { $msg = $ibx->msg_by_smsg($msg) and - return Email::MIME->new($msg); + return PublicInbox::MIME->new($msg); } }); } @@ -175,7 +175,7 @@ sub each_recent_blob { sub do_cat_mail { my ($ibx, $path) = @_; my $mime = eval { $ibx->msg_by_path($path) } or return; - Email::MIME->new($mime); + PublicInbox::MIME->new($mime); } 1; diff --git a/lib/PublicInbox/Filter/SubjectTag.pm b/lib/PublicInbox/Filter/SubjectTag.pm new file mode 100644 index 00000000..1d281425 --- /dev/null +++ b/lib/PublicInbox/Filter/SubjectTag.pm @@ -0,0 +1,33 @@ +# Copyright (C) 2017 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# Filter for various [tags] in subjects +package PublicInbox::Filter::SubjectTag; +use strict; +use warnings; +use base qw(PublicInbox::Filter::Base); + +sub new { + my ($class, %opts) = @_; + my $tag = delete $opts{-tag}; + die "tag not defined!\n" unless defined $tag && $tag ne ''; + my $self = $class->SUPER::new(%opts); + $self->{tag_re} = qr/\A\s*(re:\s+|)\Q$tag\E\s*/i; + $self; +} + +sub scrub { + my ($self, $mime) = @_; + my $subj = $mime->header('Subject'); + $subj =~ s/$self->{tag_re}/$1/; # $1 is "Re: " + $mime->header_str_set('Subject', $subj); + $self->ACCEPT($mime); +} + +# no suffix/article rejection for mirrors +sub delivery { + my ($self, $mime) = @_; + $self->scrub($mime); +} + +1; diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm index 2ffed184..905f28d7 100644 --- a/lib/PublicInbox/Filter/Vger.pm +++ b/lib/PublicInbox/Filter/Vger.pm @@ -25,7 +25,7 @@ sub scrub { # so in multipart (e.g. GPG-signed) messages, the list trailer # becomes invisible to MIME-aware email clients. if ($s =~ s/$l0\n$l1\n$l2\n$l3\n($l4\n)?\z//os) { - $mime = Email::MIME->new(\$s); + $mime = PublicInbox::MIME->new(\$s); } $self->ACCEPT($mime); } diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 1ac112b8..13671a4f 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -119,7 +119,7 @@ sub remove { $n = read($r, my $lf, 1); defined($n) or die "read final byte of cat-blob failed: $!"; die "bad read on final byte: <$lf>" if $lf ne "\n"; - my $cur = Email::MIME->new($buf); + my $cur = PublicInbox::MIME->new($buf); my $cur_s = $cur->header('Subject'); $cur_s = '' unless defined $cur_s; my $cur_m = $mime->header('Subject'); diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 1a844e1c..999f813b 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -7,6 +7,7 @@ use strict; use warnings; use PublicInbox::Git; use PublicInbox::MID qw(mid2path); +use Devel::Peek qw(SvREFCNT); my $cleanup_timer; eval { @@ -18,10 +19,20 @@ eval { my $CLEANUP = {}; # string(inbox) -> inbox sub cleanup_task () { $cleanup_timer = undef; - delete $_->{git} for values %$CLEANUP; + for my $ibx (values %$CLEANUP) { + foreach my $f (qw(git mm search)) { + delete $ibx->{$f} if SvREFCNT($ibx->{$f}) == 1; + } + } $CLEANUP = {}; } +sub _cleanup_later ($) { + my ($self) = @_; + $cleanup_timer ||= PublicInbox::EvCleanup::later(*cleanup_task); + $CLEANUP->{"$self"} = $self; +} + sub _set_uint ($$$) { my ($opts, $field, $default) = @_; my $val = $opts->{$field}; @@ -70,20 +81,23 @@ sub git { $self->{git} ||= eval { my $g = PublicInbox::Git->new($self->{mainrepo}); $g->{-httpbackend_limiter} = $self->{-httpbackend_limiter}; - $cleanup_timer ||= PublicInbox::EvCleanup::later(*cleanup_task); - $CLEANUP->{"$self"} = $self; + _cleanup_later($self); $g; }; } sub mm { my ($self) = @_; - $self->{mm} ||= eval { PublicInbox::Msgmap->new($self->{mainrepo}) }; + $self->{mm} ||= eval { + _cleanup_later($self); + PublicInbox::Msgmap->new($self->{mainrepo}); + }; } sub search { my ($self) = @_; $self->{search} ||= eval { + _cleanup_later($self); PublicInbox::Search->new($self->{mainrepo}, $self->{altid}); }; } diff --git a/lib/PublicInbox/MIME.pm b/lib/PublicInbox/MIME.pm new file mode 100644 index 00000000..54925a85 --- /dev/null +++ b/lib/PublicInbox/MIME.pm @@ -0,0 +1,102 @@ +# This library is free software; you can redistribute it and/or modify +# it under the same terms as Perl itself. +# +# The license for this file differs from the rest of public-inbox. +# +# It monkey patches the "parts_multipart" subroutine with patches +# from Matthew Horsfall <wolfsage@gmail.com> at: +# +# git clone --mirror https://github.com/rjbs/Email-MIME.git refs/pull/28/head +# +# commit fe0eb870ab732507aa39a1070a2fd9435c7e4877 +# ("Make sure we don't modify the body of a message when injecting a header.") +# commit 981d8201a7239b02114489529fd366c4c576a146 +# ("GH #14 - Handle CRLF emails properly.") +# commit 2338d93598b5e8432df24bda8dfdc231bdeb666e +# ("GH #14 - Support multipart messages without content-type in subparts.") +# +# For Email::MIME >= 1.923 && < 1.935, +# commit dcef9be66c49ae89c7a5027a789bbbac544499ce +# ("removing all trailing newlines was too much") +# is also included +package PublicInbox::MIME; +use strict; +use warnings; +use base qw(Email::MIME); + +if ($Email::MIME::VERSION <= 1.937) { +sub parts_multipart { + my $self = shift; + my $boundary = $self->{ct}->{attributes}->{boundary}; + + # Take a message, join all its lines together. Now try to Email::MIME->new + # it with 1.861 or earlier. Death! It tries to recurse endlessly on the + # body, because every time it splits on boundary it gets itself. Obviously + # that means it's a bogus message, but a mangled result (or exception) is + # better than endless recursion. -- rjbs, 2008-01-07 + return $self->parts_single_part + unless $boundary and $self->body_raw =~ /^--\Q$boundary\E\s*$/sm; + + $self->{body_raw} = Email::Simple::body($self); + + # rfc1521 7.2.1 + my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2; + + # Split on boundaries, but keep blank lines after them intact + my @bits = split /^--\Q$boundary\E\s*?(?=$self->{mycrlf})/m, ($body || ''); + + Email::Simple::body_set($self, undef); + + # If there are no headers in the potential MIME part, it's just part of the + # body. This is a horrible hack, although it's debatable whether it was + # better or worse when it was $self->{body} = shift @bits ... -- rjbs, + # 2006-11-27 + Email::Simple::body_set($self, shift @bits) if ($bits[0] || '') !~ /.*:.*/; + + my $bits = @bits; + + my @parts; + for my $bit (@bits) { + # Parts don't need headers. If they don't have them, they look like this: + # + # --90e6ba6e8d06f1723604fc1b809a + # + # Part 2 + # + # Part 2a + # + # $bit will contain two new lines before Part 2. + # + # Anything with headers will only have one new line. + # + # RFC 1341 Section 7.2 says parts without headers are to be considered + # plain US-ASCII text. -- alh + # 2016-08-01 + my $added_header; + + if ($bit =~ /^(?:$self->{mycrlf}){2}/) { + $bit = "Content-type: text/plain; charset=us-ascii" . $bit; + + $added_header = 1; + } + + $bit =~ s/\A[\n\r]+//smg; + $bit =~ s/(?<!\x0d)$self->{mycrlf}\Z//sm; + + my $email = (ref $self)->new($bit); + + if ($added_header) { + # Remove our changes so we don't change the raw email content + $email->header_str_set('Content-Type'); + } + + push @parts, $email; + } + + $self->{parts} = \@parts; + + return @{ $self->{parts} }; +} +} + +1; diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm index ef0d209f..5be06a44 100644 --- a/lib/PublicInbox/MsgIter.pm +++ b/lib/PublicInbox/MsgIter.pm @@ -6,17 +6,7 @@ use strict; use warnings; use base qw(Exporter); our @EXPORT = qw(msg_iter); -use Email::MIME; -use Scalar::Util qw(readonly); - -# Workaround Email::MIME versions without -# commit dcef9be66c49ae89c7a5027a789bbbac544499ce -# ("removing all trailing newlines was too much") -# This is necessary for Debian jessie -my $bad = 1.923; -my $good = 1.935; -my $ver = $Email::MIME::VERSION; -my $extra_nl = 1 if ($ver >= $bad && $ver < $good); +use PublicInbox::MIME; # Like Email::MIME::walk_parts, but this is: # * non-recursive @@ -36,16 +26,6 @@ sub msg_iter ($$) { @sub = map { [ $_, $depth, @idx, ++$i ] } @sub; @parts = (@sub, @parts); } else { - if ($extra_nl) { - my $lf = $part->{mycrlf}; - my $bref = $part->{body}; - if (readonly($$bref)) { - my $s = $$bref . $lf; - $part->{body} = \$s; - } else { - $$bref .= $lf; - } - } $cb->($p); } } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index a1bae419..8c72fa17 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -16,7 +16,7 @@ use constant YYYYMMDD => 4; # for searching in the WWW UI use Search::Xapian qw/:standard/; use PublicInbox::SearchMsg; -use Email::MIME; +use PublicInbox::MIME; use PublicInbox::MID qw/mid_clean id_compress/; # This is English-only, everything else is non-standard and may be confused as @@ -39,7 +39,9 @@ use constant { # 10 - optimize doc for NNTP overviews # 11 - merge threads when vivifying ghosts # 12 - change YYYYMMDD value column to numeric - SCHEMA_VERSION => 12, + # 13 - fix threading for empty References/In-Reply-To + # (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0) + SCHEMA_VERSION => 13, # n.b. FLAG_PURE_NOT is expensive not suitable for a public website # as it could become a denial-of-service vector diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 5adc17d3..c0ea3c1e 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -10,7 +10,7 @@ package PublicInbox::SearchIdx; use strict; use warnings; use Fcntl qw(:flock :DEFAULT); -use Email::MIME; +use PublicInbox::MIME; use Email::MIME::ContentType; $Email::MIME::ContentType::STRICT_PARAMS = 0; use base qw(PublicInbox::Search); @@ -285,11 +285,15 @@ sub link_message { my $mime = $smsg->{mime}; my $hdr = $mime->header_obj; my $refs = $hdr->header_raw('References'); - my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : (); + my @refs = defined $refs ? ($refs =~ /<([^>]+)>/g) : (); my $irt = $hdr->header_raw('In-Reply-To'); if (defined $irt) { - $irt = mid_clean($irt); - $irt = undef if $mid eq $irt; + if ($irt eq '') { + $irt = undef; + } else { + $irt = mid_clean($irt); + $irt = undef if $mid eq $irt; + } } my $tid; @@ -393,7 +397,7 @@ sub do_cat_mail { my $str = $git->cat_file($blob, $sizeref); # fixup bugs from import: $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; - Email::MIME->new($str); + PublicInbox::MIME->new($str); }; $@ ? undef : $mime; } diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index bd634d8d..f1c4b6a0 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -10,10 +10,10 @@ use PublicInbox::Hval qw/ascii_html/; use PublicInbox::View; use PublicInbox::WwwAtomStream; use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape); -use Email::MIME; +use PublicInbox::MIME; require PublicInbox::Git; require PublicInbox::SearchThread; -our $LIM = 50; +our $LIM = 200; sub noop {} @@ -145,15 +145,21 @@ sub search_nav_bot { my $o = $q->{o}; my $end = $o + $nr; my $beg = $o + 1; - my $rv = "</pre><hr><pre>Results $beg-$end of $total"; + my $rv = '</pre><hr><pre>'; + if ($beg <= $end) { + $rv .= "Results $beg-$end of $total"; + $rv .= ' (estimated)' if $end != $total; + } else { + $rv .= "No more results, only $total"; + } my $n = $o + $LIM; if ($n < $total) { my $qs = $q->qs_html(o => $n); - $rv .= qq{, <a\nhref="?$qs"\nrel=next>next</a>} + $rv .= qq{ <a\nhref="?$qs"\nrel=next>next</a>} } if ($o > 0) { - $rv .= $n < $total ? '/' : ', '; + $rv .= $n < $total ? '/' : ' '; my $p = $o - $LIM; my $qs = $q->qs_html(o => ($p > 0 ? $p : 0)); $rv .= qq{<a\nhref="?$qs"\nrel=prev>prev</a>}; @@ -205,7 +211,7 @@ sub mset_thread { $mime = $inbox->msg_by_smsg($mime) and last; } if ($mime) { - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); return PublicInbox::View::index_entry($mime, $ctx, scalar @$msgs); } @@ -239,7 +245,7 @@ sub adump { while (my $x = shift @items) { $x = load_doc_retry($srch, $x); $x = $ibx->msg_by_smsg($x) and - return Email::MIME->new($x); + return PublicInbox::MIME->new($x); } return undef; }); diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index e4e9d7d2..2c37cd42 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -299,7 +299,7 @@ sub stream_thread ($$) { } return missing_thread($ctx) unless $mime; - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); $ctx->{-title_html} = ascii_html($mime->header('Subject')); $ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime); PublicInbox::WwwStream->response($ctx, 200, sub { @@ -311,7 +311,7 @@ sub stream_thread ($$) { unshift @q, map { ($cl, $_) } @{$node->{children}}; my $mid = $node->{id}; if ($mime = $inbox->msg_by_smsg($node->{smsg})) { - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); return thread_index_entry($ctx, $level, $mime); } else { return ghost_index_entry($ctx, $level, $node); @@ -362,7 +362,7 @@ sub thread_html { $mime = $inbox->msg_by_smsg($mime) and last; } return missing_thread($ctx) unless $mime; - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); $ctx->{-title_html} = ascii_html($mime->header('Subject')); $ctx->{-html_tip} = '<pre>'.index_entry($mime, $ctx, scalar @$msgs); $mime = undef; @@ -372,7 +372,7 @@ sub thread_html { $mime = $inbox->msg_by_smsg($mime) and last; } if ($mime) { - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); return index_entry($mime, $ctx, scalar @$msgs); } $msgs = undef; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 11fc92e9..430e6b19 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -113,7 +113,7 @@ sub preload { require PublicInbox::Feed; require PublicInbox::View; require PublicInbox::SearchThread; - require Email::MIME; + require PublicInbox::MIME; require Digest::SHA; require POSIX; @@ -225,8 +225,8 @@ sub get_mid_html { my $x = mid2blob($ctx) or return r404($ctx); require PublicInbox::View; - require Email::MIME; - my $mime = Email::MIME->new($x); + require PublicInbox::MIME; + my $mime = PublicInbox::MIME->new($x); searcher($ctx); PublicInbox::View::msg_html($ctx, $mime); } diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index b7c2d17a..1823c248 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -6,7 +6,7 @@ package PublicInbox::WatchMaildir; use strict; use warnings; -use Email::MIME; +use PublicInbox::MIME; use Email::MIME::ContentType; $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect use PublicInbox::Git; @@ -207,7 +207,7 @@ sub _path_to_mime { local $/; my $str = <$fh>; $str or return; - return Email::MIME->new(\$str); + return PublicInbox::MIME->new(\$str); } elsif ($!{ENOENT}) { return; } else { @@ -224,18 +224,31 @@ sub _importer_for { my $addr = $inbox->{-primary_address}; PublicInbox::Import->new($git, $name, $addr, $inbox); }; - $self->{importers}->{"$im"} = $im; + + my $importers = $self->{importers}; + if (scalar(keys(%$importers)) > 2) { + delete $importers->{"$im"}; + _done_for_now($self); + } + + $importers->{"$im"} = $im; } sub _scrubber_for { my ($inbox) = @_; my $f = $inbox->{filter}; if ($f && $f =~ /::/) { + my @args; + # basic line splitting, only + # Perhaps we can have proper quote splitting one day... + ($f, @args) = split(/\s+/, $f) if $f =~ /\s+/; + eval "require $f"; if ($@) { warn $@; } else { - return $f->new; + # e.g: PublicInbox::Filter::Vger->new(@args) + return $f->new(@args); } } undef; @@ -247,7 +260,7 @@ sub _spamcheck_cb { my ($mime) = @_; my $tmp = ''; if ($sc->spamcheck($mime, \$tmp)) { - return Email::MIME->new(\$tmp); + return PublicInbox::MIME->new(\$tmp); } warn $mime->header('Message-ID')." failed spam check\n"; undef; diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm index 33bfce27..a5ba5b2b 100644 --- a/lib/PublicInbox/WwwAttach.pm +++ b/lib/PublicInbox/WwwAttach.pm @@ -5,7 +5,7 @@ package PublicInbox::WwwAttach; # internal package use strict; use warnings; -use Email::MIME; +use PublicInbox::MIME; use Email::MIME::ContentType qw(parse_content_type); $Email::MIME::ContentType::STRICT_PARAMS = 0; use PublicInbox::MsgIter; @@ -15,7 +15,7 @@ sub get_attach ($$$) { my ($ctx, $idx, $fn) = @_; my $res = [ 404, [ 'Content-Type', 'text/plain' ], [ "Not found\n" ] ]; my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res; - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); msg_iter($mime, sub { my ($part, $depth, @idx) = @{$_[0]}; return if join('.', @idx) ne $idx; diff --git a/script/public-inbox-learn b/script/public-inbox-learn index 396ab489..38c83243 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -10,32 +10,35 @@ use warnings; use PublicInbox::Config; use PublicInbox::Git; use PublicInbox::Import; -use Email::MIME; +use PublicInbox::MIME; use Email::MIME::ContentType; $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect use PublicInbox::Address; use PublicInbox::Spamcheck::Spamc; my $train = shift or die "usage: $usage\n"; -if ($train !~ /\A(?:ham|spam)\z/) { +if ($train !~ /\A(?:ham|spam|rm)\z/) { die "`$train' not recognized.\nusage: $usage\n"; } my $spamc = PublicInbox::Spamcheck::Spamc->new; my $pi_config = PublicInbox::Config->new; my $err; -my $mime = Email::MIME->new(eval { +my $mime = PublicInbox::MIME->new(eval { local $/; my $data = scalar <STDIN>; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; - eval { - if ($train eq 'ham') { - $spamc->hamlearn(\$data); - } else { - $spamc->spamlearn(\$data); - } - die "spamc failed with: $?\n" if $?; - }; - $err = $@; + + if ($train ne 'rm') { + eval { + if ($train eq 'ham') { + $spamc->hamlearn(\$data); + } elsif ($train eq 'spam') { + $spamc->spamlearn(\$data); + } + die "spamc failed with: $?\n" if $?; + }; + $err = $@; + } $data }); @@ -61,7 +64,7 @@ foreach my $recipient (keys %dests) { my $email = $ENV{GIT_COMMITTER_EMAIL} || $recipient; my $im = PublicInbox::Import->new($git, $name, $email); - if ($train eq "spam") { + if ($train eq "spam" || $train eq "rm") { # This needs to be idempotent, as my inotify trainer # may train for each cross-posted message, and this # script already learns for every list in diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 8b5258f5..60f817dc 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -52,7 +52,7 @@ $simple = undef; $emm = PublicInbox::Emergency->new($emergency); $emm->prepare(\$str); $ems = $ems->abort; -my $mime = Email::MIME->new(\$str); +my $mime = PublicInbox::MIME->new(\$str); $str = ''; do_exit(0) unless $spam_ok; diff --git a/t/filter_subjecttag.t b/t/filter_subjecttag.t new file mode 100644 index 00000000..54a219e7 --- /dev/null +++ b/t/filter_subjecttag.t @@ -0,0 +1,27 @@ +# Copyright (C) 2017 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use warnings; +use Test::More; +use Email::MIME; +use_ok 'PublicInbox::Filter::SubjectTag'; + +my $f = eval { PublicInbox::Filter::SubjectTag->new }; +like($@, qr/tag not defined/, 'error without args'); +$f = PublicInbox::Filter::SubjectTag->new('-tag', '[foo]'); +is(ref $f, 'PublicInbox::Filter::SubjectTag', 'new object created'); + +my $mime = Email::MIME->new(<<EOF); +To: you <you\@example.com> +Subject: =?UTF-8?B?UmU6IFtmb29dIEVsw4PCqWFub3I=?= + +EOF + +$mime = $f->delivery($mime); +is($mime->header('Subject'), "Re: El\xc3\xa9anor", 'filtered with Re:'); + +$mime->header_str_set('Subject', '[FOO] bar'); +$mime = $f->delivery($mime); +is($mime->header('Subject'), 'bar', 'filtered non-reply'); + +done_testing(); @@ -3,7 +3,7 @@ use strict; use warnings; use Test::More; -use Email::MIME; +use PublicInbox::MIME; use PublicInbox::Git; use PublicInbox::Import; use File::Temp qw/tempdir/; @@ -13,7 +13,7 @@ is(system(qw(git init -q --bare), $dir), 0, 'git init successful'); my $git = PublicInbox::Git->new($dir); my $im = PublicInbox::Import->new($git, 'testbox', 'test@example'); -my $mime = Email::MIME->create( +my $mime = PublicInbox::MIME->create( header => [ From => 'a@example.com', To => 'b@example.com', @@ -50,7 +50,7 @@ $im->done; is(scalar @revs, 26, '26 revisions exist after mass import'); my ($mark, $msg) = $im->remove($mime); like($mark, qr/\A:\d+\z/, 'got mark'); -is(ref($msg), 'Email::MIME', 'got old message deleted'); +is(ref($msg), 'PublicInbox::MIME', 'got old message deleted'); is(undef, $im->remove($mime), 'remove is idempotent'); diff --git a/t/mime.t b/t/mime.t new file mode 100644 index 00000000..c4bdcf0d --- /dev/null +++ b/t/mime.t @@ -0,0 +1,117 @@ +# Copyright (C) 2017 all contributors <meta@public-inbox.org> +# This library is free software; you can redistribute it and/or modify +# it under the same terms as Perl itself. +# Artistic or GPL-1+ <https://www.gnu.org/licenses/gpl-1.0.txt> +use strict; +use warnings; +use Test::More; +use_ok 'PublicInbox::MIME'; +use PublicInbox::MsgIter; + +my $msg = PublicInbox::MIME->new( +'From: Richard Hansen <hansenr@google.com> +To: git@vger.kernel.org +Cc: Richard Hansen <hansenr@google.com> +Subject: [PATCH 0/2] minor diff orderfile documentation improvements +Date: Mon, 9 Jan 2017 19:40:29 -0500 +Message-Id: <20170110004031.57985-1-hansenr@google.com> +X-Mailer: git-send-email 2.11.0.390.gc69c2f50cf-goog +Content-Type: multipart/signed; protocol="application/pkcs7-signature"; micalg=sha-256; + boundary="94eb2c0bc864b76ba30545b2bca9" + +--94eb2c0bc864b76ba30545b2bca9 + +Richard Hansen (2): + diff: document behavior of relative diff.orderFile + diff: document the pattern format for diff.orderFile + + Documentation/diff-config.txt | 5 ++++- + Documentation/diff-options.txt | 3 ++- + 2 files changed, 6 insertions(+), 2 deletions(-) + + +--94eb2c0bc864b76ba30545b2bca9 +Content-Type: application/pkcs7-signature; name="smime.p7s" +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; filename="smime.p7s" +Content-Description: (truncated) S/MIME Cryptographic Signature + +dkTlB69771K2eXK4LcHSH/2LqX+VYa3K44vrx1ruzjXdNWzIpKBy0weFNiwnJCGofvCysM2RCSI1 +--94eb2c0bc864b76ba30545b2bca9-- + +'); + +my @parts = $msg->parts; +my $exp = 'Richard Hansen (2): + diff: document behavior of relative diff.orderFile + diff: document the pattern format for diff.orderFile + + Documentation/diff-config.txt | 5 ++++- + Documentation/diff-options.txt | 3 ++- + 2 files changed, 6 insertions(+), 2 deletions(-) + +'; + +ok($msg->isa('Email::MIME'), 'compatible with Email::MIME'); +is($parts[0]->body, $exp, 'body matches expected'); + + +my $raw = q^Date: Wed, 18 Jan 2017 13:28:32 -0500 +From: Santiago Torres <santiago@nyu.edu> +To: Junio C Hamano <gitster@pobox.com> +Cc: git@vger.kernel.org, peff@peff.net, sunshine@sunshineco.com, + walters@verbum.org, Lukas Puehringer <luk.puehringer@gmail.com> +Subject: Re: [PATCH v6 4/6] builtin/tag: add --format argument for tag -v +Message-ID: <20170118182831.pkhqu2np3bh2puei@LykOS.localdomain> +References: <20170117233723.23897-1-santiago@nyu.edu> + <20170117233723.23897-5-santiago@nyu.edu> + <xmqqmvepb4oj.fsf@gitster.mtv.corp.google.com> + <xmqqh94wb4y0.fsf@gitster.mtv.corp.google.com> +MIME-Version: 1.0 +Content-Type: multipart/signed; micalg=pgp-sha256; + protocol="application/pgp-signature"; boundary="r24xguofrazenjwe" +Content-Disposition: inline +In-Reply-To: <xmqqh94wb4y0.fsf@gitster.mtv.corp.google.com> + + +--r24xguofrazenjwe +Content-Type: text/plain; charset=us-ascii +Content-Disposition: inline +Content-Transfer-Encoding: quoted-printable + +your tree directly?=20 + +--r24xguofrazenjwe +Content-Type: application/pgp-signature; name="signature.asc" + +-----BEGIN PGP SIGNATURE----- + +=7wIb +-----END PGP SIGNATURE----- + +--r24xguofrazenjwe-- + +^; + +$msg = PublicInbox::MIME->new($raw); +my $nr = 0; +msg_iter($msg, sub { + my ($part, $level, @ex) = @{$_[0]}; + if ($ex[0] == 1) { + is($part->body_str, "your tree directly? \r\n", 'body OK'); + } elsif ($ex[0] == 2) { + is($part->body, "-----BEGIN PGP SIGNATURE-----\n\n" . + "=7wIb\n" . + "-----END PGP SIGNATURE-----\n", + 'sig "matches"'); + } else { + fail "unexpected part\n"; + } + $nr++; +}); + +is($nr, 2, 'got 2 parts'); +is($msg->as_string, $raw, + 'stringified sufficiently close to original'); + +done_testing(); |