diff options
author | Eric Wong <e@80x24.org> | 2021-06-08 09:50:21 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2021-06-08 16:50:47 +0000 |
commit | 10b523eb017162240b1ac3647f8dcbbf2be348a7 (patch) | |
tree | 9ea63ea4c4919556a1bf5b335f365372dfa1c84a /lib/PublicInbox/LEI.pm | |
parent | ba34a69490dce6ea3ba85ee5416b6590fa0c0a39 (diff) | |
download | public-inbox-10b523eb017162240b1ac3647f8dcbbf2be348a7.tar.gz |
On a 4-core CPU, this speeds up "lei import" on a largish Maildir inbox with 75K messages from ~8 minutes down to ~40s. Parallelizing alone did not bring any improvement and may even hurt performance slightly, depending on CPU availability. However, creating the index on the "fid" and "name" columns in blob2name yields us the same speedup we got. Parallelizing IMAP makes more sense due to the fact most IMAP stores are non-local and subject to network latency. Followup-to: bdecd7ed8e0dcf0b45491b947cd737ba8cfe38a3 ("lei import: speed up kw updates for old IMAP messages")
Diffstat (limited to 'lib/PublicInbox/LEI.pm')
-rw-r--r-- | lib/PublicInbox/LEI.pm | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index ed01e8de..77fc5b8f 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -240,7 +240,7 @@ our %CMD = ( # sorted in order of importance/use: @c_opt ], 'import' => [ 'LOCATION...|--stdin', 'one-time import/update from URL or filesystem', - qw(stdin| offset=i recursive|r exclude=s include|I=s + qw(stdin| offset=i recursive|r exclude=s include|I=s jobs=s lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!), qw(no-torsocks torsocks=s), PublicInbox::LeiQuery::curl_opt(), @c_opt ], 'forget-mail-sync' => [ 'LOCATION...', @@ -421,7 +421,7 @@ my %CONFIG_KEYS = ( 'leistore.dir' => 'top-level storage location', ); -my @WQ_KEYS = qw(lxs l2m wq1 ikw); # internal workers +my @WQ_KEYS = qw(lxs l2m ikw pmd wq1); # internal workers sub _drop_wq { my ($self) = @_; @@ -566,7 +566,7 @@ sub pkt_op_pair { } sub workers_start { - my ($lei, $wq, $jobs, $ops) = @_; + my ($lei, $wq, $jobs, $ops, $flds) = @_; $ops = { '!' => [ \&fail_handler, $lei ], '|' => [ \&sigpipe_handler, $lei ], @@ -577,7 +577,8 @@ sub workers_start { $ops->{''} //= [ $wq->can('_lei_wq_eof') || \&wq_eof, $lei ]; my $end = $lei->pkt_op_pair; my $ident = $wq->{-wq_ident} // "lei-$lei->{cmd} worker"; - $wq->wq_workers_start($ident, $jobs, $lei->oldset, { lei => $lei }); + $flds->{lei} = $lei; + $wq->wq_workers_start($ident, $jobs, $lei->oldset, $flds); delete $lei->{pkt_op_p}; my $op_c = delete $lei->{pkt_op_c}; # {-lei_sock} persists script/lei process until ops->{''} EOF callback @@ -590,7 +591,7 @@ sub workers_start { # call this when we're ready to wait on events and yield to other clients sub wait_wq_events { my ($lei, $op_c, $ops) = @_; - for my $wq (grep(defined, @$lei{qw(ikw)})) { # auxiliary WQs + for my $wq (grep(defined, @$lei{qw(ikw pmd)})) { # auxiliary WQs $wq->wq_close(1); } $op_c->{ops} = $ops; |