diff options
author | Eric Wong <e@80x24.org> | 2021-03-14 13:12:00 +0200 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2021-03-15 08:04:44 +0000 |
commit | 42fc590f8cabd23455949d002e2ddf28bbec6d1e (patch) | |
tree | cf521b1325e9d74d60ad5f75d0df1d12cf277cb0 /lib/PublicInbox/LeiSearch.pm | |
parent | 64b557420689476493d752968d99ab8ae62bad9a (diff) | |
download | public-inbox-42fc590f8cabd23455949d002e2ddf28bbec6d1e.tar.gz |
We only want to auto import messages that are exclusively in remote externals. Messages in local externals are not auto-imported to save space and reduce wear on storage device.
Diffstat (limited to 'lib/PublicInbox/LeiSearch.pm')
-rw-r--r-- | lib/PublicInbox/LeiSearch.pm | 37 |
1 files changed, 24 insertions, 13 deletions
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index ceb3624b..2e3f10fd 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -44,29 +44,40 @@ sub content_key ($) { sub _cmp_1st { # git->cat_async callback my ($bref, $oid, $type, $size, $cmp) = @_; # cmp: [chash, found, smsg] - return if defined($cmp->[1]->[0]); # $found->[0] if (content_hash(PublicInbox::Eml->new($bref)) eq $cmp->[0]) { - push @{$cmp->[1]}, $cmp->[2]->{num}; + $cmp->[1]->{$oid} = $cmp->[2]->{num}; } } -# returns true if $eml is indexed by lei/store and keywords don't match -sub kw_changed { - my ($self, $eml, $new_kw_sorted) = @_; +sub xids_for { # returns { OID => docid } mapping for $eml matches + my ($self, $eml, $min) = @_; my ($chash, $mids) = content_key($eml); - my $over = $self->over; + my @overs = ($self->over // $self->overs_all); my $git = $self->git; - my $found = []; + my $found = {}; for my $mid (@$mids) { - my ($id, $prev); - while (my $cur = $over->next_by_mid($mid, \$id, \$prev)) { - $git->cat_async($cur->{blob}, \&_cmp_1st, - [ $chash, $found, $cur ]); - last if scalar(@$found); + for my $o (@overs) { + my ($id, $prev); + while (my $cur = $o->next_by_mid($mid, \$id, \$prev)) { + next if $found->{$cur->{blob}}; + $git->cat_async($cur->{blob}, \&_cmp_1st, + [ $chash, $found, $cur ]); + if ($min && scalar(keys %$found) >= $min) { + $git->cat_async_wait; + return $found; + } + } } } $git->cat_async_wait; - my $num = $found->[0] // return; + scalar(keys %$found) ? $found : undef; +} + +# returns true if $eml is indexed by lei/store and keywords don't match +sub kw_changed { + my ($self, $eml, $new_kw_sorted) = @_; + my $found = xids_for($self, $eml, 1) // return; + my ($num) = values %$found; my @cur_kw = msg_keywords($self, $num); join("\0", @$new_kw_sorted) eq join("\0", @cur_kw) ? 0 : 1; } |