diff options
author | Eric Wong <e@80x24.org> | 2021-06-08 09:50:21 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2021-06-08 16:50:47 +0000 |
commit | 10b523eb017162240b1ac3647f8dcbbf2be348a7 (patch) | |
tree | 9ea63ea4c4919556a1bf5b335f365372dfa1c84a /lib/PublicInbox/LeiInput.pm | |
parent | ba34a69490dce6ea3ba85ee5416b6590fa0c0a39 (diff) | |
download | public-inbox-10b523eb017162240b1ac3647f8dcbbf2be348a7.tar.gz |
On a 4-core CPU, this speeds up "lei import" on a largish Maildir inbox with 75K messages from ~8 minutes down to ~40s. Parallelizing alone did not bring any improvement and may even hurt performance slightly, depending on CPU availability. However, creating the index on the "fid" and "name" columns in blob2name yields us the same speedup we got. Parallelizing IMAP makes more sense due to the fact most IMAP stores are non-local and subject to network latency. Followup-to: bdecd7ed8e0dcf0b45491b947cd737ba8cfe38a3 ("lei import: speed up kw updates for old IMAP messages")
Diffstat (limited to 'lib/PublicInbox/LeiInput.pm')
-rw-r--r-- | lib/PublicInbox/LeiInput.pm | 31 |
1 files changed, 21 insertions, 10 deletions
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 4ff7a379..24211bf0 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -151,9 +151,16 @@ sub input_path_url { return $lei->fail(<<EOM) if $ifmt && $ifmt ne 'maildir'; $input appears to be a maildir, not $ifmt EOM - PublicInbox::MdirReader->new->maildir_each_eml($input, - $self->can('input_maildir_cb'), - $self, @args); + my $mdr = PublicInbox::MdirReader->new; + if (my $pmd = $self->{pmd}) { + $mdr->maildir_each_file($input, + $pmd->can('each_mdir_fn'), + $pmd, @args); + } else { + $mdr->maildir_each_eml($input, + $self->can('input_maildir_cb'), + $self, @args); + } } else { $lei->fail("$input unsupported (TODO)"); } @@ -215,7 +222,7 @@ sub prepare_inputs { # returns undef on error push @{$sync->{no}}, '/dev/stdin' if $sync; } my $net = $lei->{net}; # NetWriter may be created by l2m - my (@f, @d); + my (@f, @md); # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX for my $input (@$inputs) { my $input_path = $input; @@ -247,11 +254,11 @@ sub prepare_inputs { # returns undef on error PublicInbox::MboxReader->reads($ifmt) or return $lei->fail("$ifmt not supported"); } elsif (-d $input_path) { - require PublicInbox::MdirReader; $ifmt eq 'maildir' or return $lei->fail("$ifmt not supported"); $sync and $input = 'maildir:'. $lei->abs_path($input_path); + push @md, $input; } else { return $lei->fail("Unable to handle $input"); } @@ -266,21 +273,18 @@ $input is `eml', not --in-format=$in_fmt if ($devfd >= 0 || -f $input || -p _) { push @{$sync->{no}}, $input if $sync; push @f, $input; - } elsif (-d $input) { + } elsif (-d "$input/new" && -d "$input/cur") { if ($sync) { $input = $lei->abs_path($input); push @{$sync->{ok}}, $input; } - push @d, $input; + push @md, $input; } else { return $lei->fail("Unable to handle $input") } } } if (@f) { check_input_format($lei, \@f) or return } - if (@d) { # TODO: check for MH vs Maildir, here - require PublicInbox::MdirReader; - } if ($sync && $sync->{no}) { return $lei->fail(<<"") if !$sync->{ok}; --mail-sync specified but no inputs support it @@ -299,6 +303,13 @@ $input is `eml', not --in-format=$in_fmt $lei->{auth} //= PublicInbox::LeiAuth->new; $lei->{net} //= $net; } + if (scalar(@md)) { + require PublicInbox::MdirReader; + if ($self->can('pmdir_cb')) { + require PublicInbox::LeiPmdir; + $self->{pmd} = PublicInbox::LeiPmdir->new($lei, $self); + } + } $self->{inputs} = $inputs; } |