about summary refs log tree commit homepage
path: root/lib/PublicInbox/LeiMailSync.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/LeiMailSync.pm')
-rw-r--r--lib/PublicInbox/LeiMailSync.pm106
1 files changed, 73 insertions, 33 deletions
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 665206a8..c498421c 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -6,9 +6,12 @@ package PublicInbox::LeiMailSync;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::Lock);
+use PublicInbox::Compat qw(uniqstr);
 use DBI qw(:sql_types); # SQL_BLOB
 use PublicInbox::ContentHash qw(git_sha);
 use Carp ();
+use PublicInbox::Git qw(%HEXLEN2SHA);
+use PublicInbox::IO qw(read_all);
 
 sub dbh_new {
         my ($self) = @_;
@@ -339,6 +342,17 @@ SELECT $op(uid) FROM blob2num WHERE fid = ?
         $ret;
 }
 
+# must be called with lock
+sub _forget_fids ($;@) {
+        my $dbh = shift;
+        $dbh->begin_work;
+        for my $t (qw(blob2name blob2num folders)) {
+                my $sth = $dbh->prepare_cached("DELETE FROM $t WHERE fid = ?");
+                $sth->execute($_) for @_;
+        }
+        $dbh->commit;
+}
+
 # returns a { location => [ list-of-ids-or-names ] } mapping
 sub locations_for {
         my ($self, $oidbin) = @_;
@@ -379,18 +393,28 @@ sub locations_for {
 
         $sth = $dbh->prepare('SELECT loc FROM folders WHERE fid = ? LIMIT 1');
         my $ret = {};
+        my $drop_fids = $dbh->{ReadOnly} ? undef : {};
         while (my ($fid, $ids) = each %fid2id) {
                 $sth->execute($fid);
                 my ($loc) = $sth->fetchrow_array;
                 unless (defined $loc) {
+                        my $del = '';
+                        if ($drop_fids) {
+                                $del = ' (deleting)';
+                                $drop_fids->{$fid} = $fid;
+                        }
                         my $oidhex = unpack('H*', $oidbin);
-                        warn "E: fid=$fid for $oidhex unknown:\n", map {
-                                        'E: '.(ref() ? $$_ : "#$_")."\n";
+                        warn "E: fid=$fid for $oidhex stale/unknown:\n", map {
+                                        'E: '.(ref() ? $$_ : "#$_")."$del\n";
                                 } @$ids;
                         next;
                 }
                 $ret->{$loc} = $ids;
         }
+        if ($drop_fids && scalar(values %$drop_fids)) {
+                my $lk = $self->lock_for_scope;
+                _forget_fids($self->{dbh}, values %$drop_fids);
+        }
         scalar(keys %$ret) ? $ret : undef;
 }
 
@@ -401,9 +425,13 @@ sub folders {
         my $re;
         if (defined($pfx[0])) {
                 $sql .= ' WHERE loc REGEXP ?'; # DBD::SQLite uses perlre
-                $re = !!$pfx[1] ? '.*' : '';
-                $re .= quotemeta($pfx[0]);
-                $re .= '.*';
+                if (ref($pfx[0])) { # assume qr// "Regexp"
+                        $re = $pfx[0];
+                } else {
+                        $re = !!$pfx[1] ? '.*' : '';
+                        $re .= quotemeta($pfx[0]);
+                        $re .= '.*';
+                }
         }
         my $sth = ($self->{dbh} //= dbh_new($self))->prepare($sql);
         $sth->bind_param(1, $re) if defined($re);
@@ -411,15 +439,24 @@ sub folders {
         map { $_->[0] } @{$sth->fetchall_arrayref};
 }
 
+sub blob_mismatch ($$$) {
+        my ($f, $oidhex, $rawref) = @_;
+        my $sha = $HEXLEN2SHA{length($oidhex)};
+        my $got = git_sha($sha, $rawref)->hexdigest;
+        $got eq $oidhex ? undef : warn("$f changed $oidhex => $got\n");
+}
+
 sub local_blob {
         my ($self, $oidhex, $vrfy) = @_;
         my $dbh = $self->{dbh} //= dbh_new($self);
+        my $oidbin = pack('H*', $oidhex);
+
         my $b2n = $dbh->prepare(<<'');
 SELECT f.loc,b.name FROM blob2name b
 LEFT JOIN folders f ON b.fid = f.fid
 WHERE b.oidbin = ?
 
-        $b2n->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
+        $b2n->bind_param(1, $oidbin, SQL_BLOB);
         $b2n->execute;
         while (my ($d, $n) = $b2n->fetchrow_array) {
                 substr($d, 0, length('maildir:')) = '';
@@ -432,19 +469,28 @@ WHERE b.oidbin = ?
                         my $f = "$d/$x/$n";
                         open my $fh, '<', $f or next;
                         # some (buggy) Maildir writers are non-atomic:
-                        next unless -s $fh;
-                        local $/;
-                        my $raw = <$fh>;
-                        if ($vrfy) {
-                                my $got = git_sha(1, \$raw)->hexdigest;
-                                if ($got ne $oidhex) {
-                                        warn "$f changed $oidhex => $got\n";
-                                        next;
-                                }
-                        }
+                        my $raw = read_all($fh, -s $fh // next);
+                        next if $vrfy && blob_mismatch $f, $oidhex, \$raw;
                         return \$raw;
                 }
         }
+
+        # MH, except `uid' is not always unique (can be packed)
+        $b2n = $dbh->prepare(<<'');
+SELECT f.loc,b.uid FROM blob2num b
+LEFT JOIN folders f ON b.fid = f.fid
+WHERE b.oidbin = ? AND f.loc REGEXP '^mh:/'
+
+        $b2n->bind_param(1, $oidbin, SQL_BLOB);
+        $b2n->execute;
+        while (my ($f, $n) = $b2n->fetchrow_array) {
+                $f =~ s/\Amh://s or die "BUG: not MH: $f";
+                $f .= "/$n";
+                open my $fh, '<', $f or next;
+                my $raw = read_all($fh, -s $fh // next);
+                next if blob_mismatch $f, $oidhex, \$raw;
+                return \$raw;
+        }
         undef;
 }
 
@@ -520,20 +566,19 @@ EOM
 --all=@no not accepted (must be `local' and/or `remote')
 EOM
         }
-        my (%seen, @inc);
         my @all = $self->folders;
         for my $ok (@ok) {
                 if ($ok eq 'local') {
-                        @inc = grep(!m!\A[a-z0-9\+]+://!i, @all);
+                        push @$folders, grep(!m!\A[a-z0-9\+]+://!i, @all);
                 } elsif ($ok eq 'remote') {
-                        @inc = grep(m!\A[a-z0-9\+]+://!i, @all);
+                        push @$folders, grep(m!\A[a-z0-9\+]+://!i, @all);
                 } elsif ($ok ne '') {
                         return $lei->fail("--all=$all not understood");
                 } else {
-                        @inc = @all;
+                        push @$folders, @all;
                 }
-                push(@$folders, (grep { !$seen{$_}++ } @inc));
         }
+        @$folders = uniqstr @$folders;
         scalar(@$folders) || $lei->fail(<<EOM);
 no --mail-sync folders known to lei
 EOM
@@ -596,14 +641,10 @@ EOF
 sub forget_folders {
         my ($self, @folders) = @_;
         my $lk = $self->lock_for_scope;
-        for my $folder (@folders) {
-                my $fid = delete($self->{fmap}->{$folder}) //
-                        fid_for($self, $folder) // next;
-                for my $t (qw(blob2name blob2num folders)) {
-                        $self->{dbh}->do("DELETE FROM $t WHERE fid = ?",
-                                        undef, $fid);
-                }
-        }
+        _forget_fids($self->{dbh}, map {
+                delete($self->{fmap}->{$_}) //
+                        fid_for($self, $_) // ();
+        } @folders);
 }
 
 # only used for changing canonicalization errors
@@ -637,8 +678,8 @@ sub num_oidbin ($$$) {
 SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ? ORDER BY _rowid_
 EOM
         $sth->execute($fid, $uid);
-        my %uniq; # for public-inbox <= 1.7.0
-        grep { !$uniq{$_}++ } map { $_->[0] } @{$sth->fetchall_arrayref};
+        # for public-inbox <= 1.7.0:
+        uniqstr(map { $_->[0] } @{$sth->fetchall_arrayref});
 }
 
 sub name_oidbin ($$$) {
@@ -655,8 +696,7 @@ EOM
         $sth->bind_param(2, $nm, SQL_VARCHAR);
         $sth->execute;
         my @old = map { $_->[0] } @{$sth->fetchall_arrayref};
-        my %uniq; # for public-inbox <= 1.7.0
-        grep { !$uniq{$_}++ } (@bin, @old);
+        uniqstr @bin, @old # for public-inbox <= 1.7.0
 }
 
 sub imap_oidhex {