diff options
Diffstat (limited to 'lib/PublicInbox/LeiMailSync.pm')
-rw-r--r-- | lib/PublicInbox/LeiMailSync.pm | 106 |
1 files changed, 73 insertions, 33 deletions
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm index 665206a8..c498421c 100644 --- a/lib/PublicInbox/LeiMailSync.pm +++ b/lib/PublicInbox/LeiMailSync.pm @@ -6,9 +6,12 @@ package PublicInbox::LeiMailSync; use strict; use v5.10.1; use parent qw(PublicInbox::Lock); +use PublicInbox::Compat qw(uniqstr); use DBI qw(:sql_types); # SQL_BLOB use PublicInbox::ContentHash qw(git_sha); use Carp (); +use PublicInbox::Git qw(%HEXLEN2SHA); +use PublicInbox::IO qw(read_all); sub dbh_new { my ($self) = @_; @@ -339,6 +342,17 @@ SELECT $op(uid) FROM blob2num WHERE fid = ? $ret; } +# must be called with lock +sub _forget_fids ($;@) { + my $dbh = shift; + $dbh->begin_work; + for my $t (qw(blob2name blob2num folders)) { + my $sth = $dbh->prepare_cached("DELETE FROM $t WHERE fid = ?"); + $sth->execute($_) for @_; + } + $dbh->commit; +} + # returns a { location => [ list-of-ids-or-names ] } mapping sub locations_for { my ($self, $oidbin) = @_; @@ -379,18 +393,28 @@ sub locations_for { $sth = $dbh->prepare('SELECT loc FROM folders WHERE fid = ? LIMIT 1'); my $ret = {}; + my $drop_fids = $dbh->{ReadOnly} ? undef : {}; while (my ($fid, $ids) = each %fid2id) { $sth->execute($fid); my ($loc) = $sth->fetchrow_array; unless (defined $loc) { + my $del = ''; + if ($drop_fids) { + $del = ' (deleting)'; + $drop_fids->{$fid} = $fid; + } my $oidhex = unpack('H*', $oidbin); - warn "E: fid=$fid for $oidhex unknown:\n", map { - 'E: '.(ref() ? $$_ : "#$_")."\n"; + warn "E: fid=$fid for $oidhex stale/unknown:\n", map { + 'E: '.(ref() ? $$_ : "#$_")."$del\n"; } @$ids; next; } $ret->{$loc} = $ids; } + if ($drop_fids && scalar(values %$drop_fids)) { + my $lk = $self->lock_for_scope; + _forget_fids($self->{dbh}, values %$drop_fids); + } scalar(keys %$ret) ? $ret : undef; } @@ -401,9 +425,13 @@ sub folders { my $re; if (defined($pfx[0])) { $sql .= ' WHERE loc REGEXP ?'; # DBD::SQLite uses perlre - $re = !!$pfx[1] ? '.*' : ''; - $re .= quotemeta($pfx[0]); - $re .= '.*'; + if (ref($pfx[0])) { # assume qr// "Regexp" + $re = $pfx[0]; + } else { + $re = !!$pfx[1] ? '.*' : ''; + $re .= quotemeta($pfx[0]); + $re .= '.*'; + } } my $sth = ($self->{dbh} //= dbh_new($self))->prepare($sql); $sth->bind_param(1, $re) if defined($re); @@ -411,15 +439,24 @@ sub folders { map { $_->[0] } @{$sth->fetchall_arrayref}; } +sub blob_mismatch ($$$) { + my ($f, $oidhex, $rawref) = @_; + my $sha = $HEXLEN2SHA{length($oidhex)}; + my $got = git_sha($sha, $rawref)->hexdigest; + $got eq $oidhex ? undef : warn("$f changed $oidhex => $got\n"); +} + sub local_blob { my ($self, $oidhex, $vrfy) = @_; my $dbh = $self->{dbh} //= dbh_new($self); + my $oidbin = pack('H*', $oidhex); + my $b2n = $dbh->prepare(<<''); SELECT f.loc,b.name FROM blob2name b LEFT JOIN folders f ON b.fid = f.fid WHERE b.oidbin = ? - $b2n->bind_param(1, pack('H*', $oidhex), SQL_BLOB); + $b2n->bind_param(1, $oidbin, SQL_BLOB); $b2n->execute; while (my ($d, $n) = $b2n->fetchrow_array) { substr($d, 0, length('maildir:')) = ''; @@ -432,19 +469,28 @@ WHERE b.oidbin = ? my $f = "$d/$x/$n"; open my $fh, '<', $f or next; # some (buggy) Maildir writers are non-atomic: - next unless -s $fh; - local $/; - my $raw = <$fh>; - if ($vrfy) { - my $got = git_sha(1, \$raw)->hexdigest; - if ($got ne $oidhex) { - warn "$f changed $oidhex => $got\n"; - next; - } - } + my $raw = read_all($fh, -s $fh // next); + next if $vrfy && blob_mismatch $f, $oidhex, \$raw; return \$raw; } } + + # MH, except `uid' is not always unique (can be packed) + $b2n = $dbh->prepare(<<''); +SELECT f.loc,b.uid FROM blob2num b +LEFT JOIN folders f ON b.fid = f.fid +WHERE b.oidbin = ? AND f.loc REGEXP '^mh:/' + + $b2n->bind_param(1, $oidbin, SQL_BLOB); + $b2n->execute; + while (my ($f, $n) = $b2n->fetchrow_array) { + $f =~ s/\Amh://s or die "BUG: not MH: $f"; + $f .= "/$n"; + open my $fh, '<', $f or next; + my $raw = read_all($fh, -s $fh // next); + next if blob_mismatch $f, $oidhex, \$raw; + return \$raw; + } undef; } @@ -520,20 +566,19 @@ EOM --all=@no not accepted (must be `local' and/or `remote') EOM } - my (%seen, @inc); my @all = $self->folders; for my $ok (@ok) { if ($ok eq 'local') { - @inc = grep(!m!\A[a-z0-9\+]+://!i, @all); + push @$folders, grep(!m!\A[a-z0-9\+]+://!i, @all); } elsif ($ok eq 'remote') { - @inc = grep(m!\A[a-z0-9\+]+://!i, @all); + push @$folders, grep(m!\A[a-z0-9\+]+://!i, @all); } elsif ($ok ne '') { return $lei->fail("--all=$all not understood"); } else { - @inc = @all; + push @$folders, @all; } - push(@$folders, (grep { !$seen{$_}++ } @inc)); } + @$folders = uniqstr @$folders; scalar(@$folders) || $lei->fail(<<EOM); no --mail-sync folders known to lei EOM @@ -596,14 +641,10 @@ EOF sub forget_folders { my ($self, @folders) = @_; my $lk = $self->lock_for_scope; - for my $folder (@folders) { - my $fid = delete($self->{fmap}->{$folder}) // - fid_for($self, $folder) // next; - for my $t (qw(blob2name blob2num folders)) { - $self->{dbh}->do("DELETE FROM $t WHERE fid = ?", - undef, $fid); - } - } + _forget_fids($self->{dbh}, map { + delete($self->{fmap}->{$_}) // + fid_for($self, $_) // (); + } @folders); } # only used for changing canonicalization errors @@ -637,8 +678,8 @@ sub num_oidbin ($$$) { SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ? ORDER BY _rowid_ EOM $sth->execute($fid, $uid); - my %uniq; # for public-inbox <= 1.7.0 - grep { !$uniq{$_}++ } map { $_->[0] } @{$sth->fetchall_arrayref}; + # for public-inbox <= 1.7.0: + uniqstr(map { $_->[0] } @{$sth->fetchall_arrayref}); } sub name_oidbin ($$$) { @@ -655,8 +696,7 @@ EOM $sth->bind_param(2, $nm, SQL_VARCHAR); $sth->execute; my @old = map { $_->[0] } @{$sth->fetchall_arrayref}; - my %uniq; # for public-inbox <= 1.7.0 - grep { !$uniq{$_}++ } (@bin, @old); + uniqstr @bin, @old # for public-inbox <= 1.7.0 } sub imap_oidhex { |