From dcba4bf8cc193e22210c91d43fe5eab091bffe06 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 28 Jul 2016 20:44:57 +0000 Subject: add script used for importing git from download.gmane.org In case others want to use it... --- scripts/import_vger_from_mbox | 47 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 scripts/import_vger_from_mbox (limited to 'scripts') diff --git a/scripts/import_vger_from_mbox b/scripts/import_vger_from_mbox new file mode 100644 index 00000000..4976e056 --- /dev/null +++ b/scripts/import_vger_from_mbox @@ -0,0 +1,47 @@ +#!/usr/bin/perl -w +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Email::MIME; +$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect +use PublicInbox::Git; +use PublicInbox::Import; +my $usage = "usage: $0 NAME EMAIL new($git_dir); +my $name = shift or die $usage; # git +my $email = shift or die $usage; # git@vger.kernel.org +my $im = PublicInbox::Import->new($git, $name, $email); +binmode STDIN; +my $msg = ''; +use PublicInbox::Filter::Vger; +my $vger = PublicInbox::Filter::Vger->new; +sub do_add ($$) { + my ($im, $msg) = @_; + $$msg =~ s/(\r?\n)+\z/$1/s; + $msg = Email::MIME->new($$msg); + $msg = $vger->scrub($msg); + $im->add($msg) or + warn "duplicate: ", + $msg->header_obj->header_raw('Message-ID'), "\n"; +} + +# asctime: From example@example.com Fri Jun 23 02:56:55 2000 +my $from_strict = qr/^From \S+ \S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/; +my $prev = undef; +while (defined(my $l = )) { + if ($l =~ /$from_strict/o) { + if (!defined($prev) || $prev =~ /^\r?$/) { + do_add($im, \$msg) if $msg; + $msg = ''; + $prev = $l; + next; + } + warn "W[$.] $l\n"; + } + $prev = $l; + $msg .= $l; +} +do_add($im, \$msg) if $msg; +$im->done; -- cgit v1.2.3-24-ge0c7