diff options
author | Eric Wong <e@80x24.org> | 2021-01-13 19:06:20 -1200 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2021-01-14 23:14:08 +0000 |
commit | 8591e28bd9acfb44bd07d9bf75c055dfe3edf1a7 (patch) | |
tree | 59b1cf8063b4f93d8415f7dbebc445ddf93660c7 /lib/PublicInbox/LeiDedupe.pm | |
parent | 14055cedd9fc6ec593528a243ff52b6684df70c0 (diff) | |
download | public-inbox-8591e28bd9acfb44bd07d9bf75c055dfe3edf1a7.tar.gz |
We'll be passing these objects via PublicInbox::IPC which uses Storable (or Sereal), so ensure they're safe to use after serialization.
Diffstat (limited to 'lib/PublicInbox/LeiDedupe.pm')
-rw-r--r-- | lib/PublicInbox/LeiDedupe.pm | 29 |
1 files changed, 16 insertions, 13 deletions
diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm index 58eee533..81754361 100644 --- a/lib/PublicInbox/LeiDedupe.pm +++ b/lib/PublicInbox/LeiDedupe.pm @@ -43,9 +43,9 @@ sub smsg_hash ($) { } # the paranoid option -sub dedupe_oid () { - my $skv = PublicInbox::SharedKV->new; - ($skv, sub { # may be called in a child process +sub dedupe_oid ($) { + my ($skv) = @_; + (sub { # may be called in a child process my ($eml, $oid) = @_; $skv->set_maybe(_oidbin($oid) // _regen_oid($eml), ''); }, sub { @@ -55,9 +55,9 @@ sub dedupe_oid () { } # dangerous if there's duplicate messages with different Message-IDs -sub dedupe_mid () { - my $skv = PublicInbox::SharedKV->new; - ($skv, sub { # may be called in a child process +sub dedupe_mid ($) { + my ($skv) = @_; + (sub { # may be called in a child process my ($eml, $oid) = @_; # TODO: lei will support non-public messages w/o Message-ID my $mid = $eml->header_raw('Message-ID') // _oidbin($oid) // @@ -73,9 +73,9 @@ sub dedupe_mid () { } # our default deduplication strategy (used by v2, also) -sub dedupe_content () { - my $skv = PublicInbox::SharedKV->new; - ($skv, sub { # may be called in a child process +sub dedupe_content ($) { + my ($skv) = @_; + (sub { # may be called in a child process my ($eml) = @_; # oid = $_[1], ignored $skv->set_maybe(content_hash($eml), ''); }, sub { @@ -86,7 +86,7 @@ sub dedupe_content () { # no deduplication at all sub true { 1 } -sub dedupe_none () { (undef, \&true, \&true) } +sub dedupe_none ($) { (\&true, \&true) } sub new { my ($cls, $lei, $dst) = @_; @@ -94,10 +94,12 @@ sub new { # allow "none" to bypass Eml->new if writing to directory: return if ($dd eq 'none' && substr($dst // '', -1) eq '/'); + my $m = "dedupe_$dd"; + $cls->can($m) or die "unsupported dedupe strategy: $dd\n"; + my $skv = $dd eq 'none' ? undef : PublicInbox::SharedKV->new; - my $dd_new = $cls->can("dedupe_$dd") // - die "unsupported dedupe strategy: $dd\n"; - bless [ $dd_new->() ], $cls; # [ $skv, $cb ] + # [ $skv, $eml_cb, $smsg_cb, "dedupe_$dd" ] + bless [ $skv, undef, undef, $m ], $cls; } # returns true on unseen messages according to the deduplication strategy, @@ -115,6 +117,7 @@ sub is_smsg_dup { sub prepare_dedupe { my ($self) = @_; my $skv = $self->[0]; + $self->[1] or @$self[1,2] = $self->can($self->[3])->($skv); $skv ? $skv->dbh : undef; } |