From 6e97491f3c4a9787622618b4409659af67695cf5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 11 Jan 2015 04:28:46 +0000 Subject: scripts/import_slrnspool: new incremental importer This allows incremental imports of slrn spools, ideal for tracking lists via gmane. --- scripts/import_slrnspool | 69 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 scripts/import_slrnspool (limited to 'scripts') diff --git a/scripts/import_slrnspool b/scripts/import_slrnspool new file mode 100755 index 00000000..560c08c2 --- /dev/null +++ b/scripts/import_slrnspool @@ -0,0 +1,69 @@ +#!/usr/bin/perl -w +# Copyright (C) 2015, all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# Incremental (or one-shot) importer of a slrnpull news spool +=begin usage + export ORIGINAL_RECIPIENT=address@example.com + public-inbox-init $LISTNAME $GIT_DIR $HTTP_URL $ORIGINAL_RECIPIENT + ./import_slrnspool SLRNPULL_ROOT/news/foo/bar +=cut +use strict; +use warnings; +use PublicInbox::Config; +use Email::Filter; +use Email::LocalDelivery; +sub usage { "Usage:\n".join('',grep(/\t/, `head -n 10 $0`)) } +my $spool = shift @ARGV or die usage(); +my $recipient = $ENV{ORIGINAL_RECIPIENT}; +defined $recipient or die usage(); +my @mda = qw(public-inbox-mda); +my $config = PublicInbox::Config->new; +my $cfg = $config->lookup($recipient); +defined $cfg or exit(1); +use Data::Dumper; print STDERR Dumper($cfg); + +sub get_min { + my ($cfg) = @_; + $cfg->{importslrnspoolstate} || 1 +} + +sub set_min { + my ($cfg, $num) = @_; + my $f = PublicInbox::Config->default_file; + my @cmd = (qw/git config/, "--file=$f", + "publicinbox.$cfg->{listname}.importslrnspoolstate", $num); + system(@cmd) == 0 or die join(' ', @cmd). " failed: $?\n"; +} + +my $n = get_min(); +my $ok; +my $max_gap = 10000; +my $max = $n + $max_gap; + +for (; $n < $max; $n++) { + my $fn = "$spool/$n"; + print STDERR $fn, "\n"; + open(my $fh, '<', $fn) or next; + $max = $n + $max_gap; + my $f = Email::Filter->new(data => eval { local $/; <$fh> }); + my $s = $f->simple; + + # gmane rewrites Received headers, which increases spamminess + my @h = $s->header('Original-Received'); + if (@h) { + $s->header_set('Received', @h); + $s->header_set('Original-Received'); + } + + # triggers for the SA HEADER_SPAM rule + foreach my $drop (qw(Approved)) { $s->header_set($drop) } + + # appears to be an old gmane bug: + $s->header_set('connect()'); + + $f->exit(0); + $f->pipe(@mda); + $ok = $n + 1; + set_min($cfg, $ok); +} -- cgit v1.2.3-24-ge0c7