about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-12-29 18:05:14 +0000
committerEric Wong <e@80x24.org>2023-12-30 06:44:02 +0000
commit5aab49f319679cf7912f1abf4914272e5112e247 (patch)
treefeb93cd9b7062e61dd547b96fce91b5231bade88
parentcf977e706b07e80f394570a393eb2169b9b9a1a7 (diff)
downloadpublic-inbox-5aab49f319679cf7912f1abf4914272e5112e247.tar.gz
The MH format is widely-supported and used by various MUAs such
as mutt and sylpheed, and a MH-like format is used by mlmmj for
archives, as well.  Locking implementations for writes are
inconsistent, so this commit doesn't support writes, yet.

inotify|EVFILT_VNODE watches aren't supported, yet, but that'll
have to come since MH allows packing unused integers and
renaming files.
-rw-r--r--MANIFEST3
-rw-r--r--lib/PublicInbox/LEI.pm13
-rw-r--r--lib/PublicInbox/LeiConvert.pm5
-rw-r--r--lib/PublicInbox/LeiImport.pm23
-rw-r--r--lib/PublicInbox/LeiImportKw.pm2
-rw-r--r--lib/PublicInbox/LeiIndex.pm2
-rw-r--r--lib/PublicInbox/LeiInput.pm52
-rw-r--r--lib/PublicInbox/LeiMailSync.pm40
-rw-r--r--lib/PublicInbox/LeiToMail.pm5
-rw-r--r--lib/PublicInbox/MHreader.pm103
-rw-r--r--lib/PublicInbox/MdirReader.pm2
-rw-r--r--lib/PublicInbox/MdirSort.pm46
-rw-r--r--lib/PublicInbox/TestCommon.pm22
-rw-r--r--t/mh_reader.t107
14 files changed, 392 insertions, 33 deletions
diff --git a/MANIFEST b/MANIFEST
index 109ce88a..051cd6f9 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -296,6 +296,7 @@ lib/PublicInbox/Linkify.pm
 lib/PublicInbox/Listener.pm
 lib/PublicInbox/Lock.pm
 lib/PublicInbox/MDA.pm
+lib/PublicInbox/MHreader.pm
 lib/PublicInbox/MID.pm
 lib/PublicInbox/MIME.pm
 lib/PublicInbox/MailDiff.pm
@@ -305,6 +306,7 @@ lib/PublicInbox/MboxGz.pm
 lib/PublicInbox/MboxLock.pm
 lib/PublicInbox/MboxReader.pm
 lib/PublicInbox/MdirReader.pm
+lib/PublicInbox/MdirSort.pm
 lib/PublicInbox/MiscIdx.pm
 lib/PublicInbox/MiscSearch.pm
 lib/PublicInbox/MsgIter.pm
@@ -547,6 +549,7 @@ t/mda-mime.eml
 t/mda.t
 t/mda_filter_rubylang.t
 t/mdir_reader.t
+t/mh_reader.t
 t/mid.t
 t/mime.t
 t/miscsearch.t
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 17431518..e0cfd55a 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -267,7 +267,7 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
         'one-time import/update from URL or filesystem',
         qw(stdin| offset=i recursive|r exclude=s include|I=s new-only
         lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!
-        commit-delay=i),
+        commit-delay=i sort|s:s@),
         @net_opt, @c_opt ],
 'forget-mail-sync' => [ 'LOCATION...',
         'forget sync information for a mail folder', @c_opt ],
@@ -280,7 +280,7 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
 'convert' => [ 'LOCATION...|--stdin',
         'one-time conversion from URL or filesystem to another format',
         qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!
-                rsyncable),
+                rsyncable sort|s:s@),
         @net_opt, @c_opt ],
 'p2q' => [ 'LOCATION_OR_COMMIT...|--stdin',
         "use a patch to generate a query for `lei q --stdin'",
@@ -321,6 +321,9 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
 my $stdin_formats = [ 'MAIL_FORMAT|eml|mboxrd|mboxcl2|mboxcl|mboxo',
                         'specify message input format' ];
 my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ];
+my $sort_out = [ 'VAL|received|relevance|docid',
+                "order of results is `--output'-dependent"];
+my $sort_in = [ 'sequence|mtime|size', 'sort input (format-dependent)' ];
 
 # we use \x{a0} (non-breaking SP) to avoid wrapping in PublicInbox::LeiHelp
 my %OPTDESC = (
@@ -428,8 +431,10 @@ my %OPTDESC = (
 'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ],
 'offset=i' => ['OFF', 'search result offset (default: 0)'],
 
-'sort|s=s' => [ 'VAL|received|relevance|docid',
-                "order of results is `--output'-dependent"],
+'sort|s=s        q' => $sort_out,
+'sort|s=s        lcat' => $sort_out,
+'sort|s:s@        convert' => $sort_in,
+'sort|s:s@        import' => $sort_in,
 'reverse|r' => 'reverse search results', # like sort(1)
 
 'boost=i' => 'increase/decrease priority of results (default: 0)',
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 8f628562..17a952f2 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -28,6 +28,11 @@ sub input_maildir_cb {
         $self->{wcb}->(undef, { kw => $kw }, $eml);
 }
 
+sub input_mh_cb {
+        my ($dn, $bn, $kw, $eml, $self) = @_;
+        $self->{wcb}->(undef, { kw => $kw }, $eml);
+}
+
 sub process_inputs { # via wq_do
         my ($self) = @_;
         local $PublicInbox::DS::in_loop = 0; # force synchronous awaitpid
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index c2552bf0..5521188c 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -53,6 +53,29 @@ sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn
         }
 }
 
+sub input_mh_cb {
+        my ($mhdir, $n, $kw, $eml, $self) = @_;
+        substr($mhdir, 0, 0) = 'mh:'; # add prefix
+        my $lse = $self->{lse} //= $self->{lei}->{sto}->search;
+        my $lms = $self->{-lms_rw} //= $self->{lei}->lms; # may be 0 or undef
+        my @oidbin = $lms ? $lms->num_oidbin($mhdir, $n) : ();
+        @oidbin > 1 and warn("W: $mhdir/$n not unique:\n",
+                                map { "\t".unpack('H*', $_)."\n" } @oidbin);
+        my @docids = sort { $a <=> $b } uniqstr
+                        map { $lse->over->oidbin_exists($_) } @oidbin;
+        if (scalar @docids) {
+                $lse->kw_changed(undef, $kw, \@docids) or return;
+        }
+        if (defined $eml) {
+                my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
+                $vmd->{sync_info} = [ $mhdir, $n + 0 ] if $self->{-mail_sync};
+                $self->input_eml_cb($eml, $vmd);
+        }
+        # TODO:
+        # elsif (my $ikw = $self->{lei}->{ikw}) { # old message, kw only
+        #        $ikw->wq_io_do('ck_update_kw', [], "mh:$dir", $uid, $kw);
+}
+
 sub input_net_cb { # imap_each / nntp_each
         my ($uri, $uid, $kw, $eml, $self) = @_;
         if (defined $eml) {
diff --git a/lib/PublicInbox/LeiImportKw.pm b/lib/PublicInbox/LeiImportKw.pm
index 4b8e69fb..765e23cd 100644
--- a/lib/PublicInbox/LeiImportKw.pm
+++ b/lib/PublicInbox/LeiImportKw.pm
@@ -36,7 +36,7 @@ sub ipc_atfork_child {
 sub ck_update_kw { # via wq_io_do
         my ($self, $url, $uid, $kw) = @_;
         my @oidbin = $self->{-lms_rw}->num_oidbin($url, $uid);
-        my $uid_url = "$url/;UID=$uid";
+        my $uid_url = index($url, 'mh:') == 0 ? $url.$uid : "$url/;UID=$uid";
         @oidbin > 1 and warn("W: $uid_url not unique:\n",
                                 map { "\t".unpack('H*', $_)."\n" } @oidbin);
         my @docids = sort { $a <=> $b } uniqstr
diff --git a/lib/PublicInbox/LeiIndex.pm b/lib/PublicInbox/LeiIndex.pm
index b3f3e1a0..0e329e58 100644
--- a/lib/PublicInbox/LeiIndex.pm
+++ b/lib/PublicInbox/LeiIndex.pm
@@ -35,7 +35,7 @@ sub lei_index {
 
 no warnings 'once';
 no strict 'refs';
-for my $m (qw(pmdir_cb input_net_cb)) {
+for my $m (qw(pmdir_cb input_net_cb input_mh_cb)) {
         *$m = PublicInbox::LeiImport->can($m);
 }
 
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index daba9a8e..947a7a79 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -69,6 +69,11 @@ sub input_maildir_cb {
         $self->input_eml_cb($eml);
 }
 
+sub input_mh_cb {
+        my ($dn, $n, $kw, $eml, $self) = @_;
+        $self->input_eml_cb($eml);
+}
+
 sub input_net_cb { # imap_each, nntp_each cb
         my ($url, $uid, $kw, $eml, $self) = @_;
         $self->input_eml_cb($eml);
@@ -190,7 +195,7 @@ sub input_path_url {
                 $ifmt = lc($1);
         } elsif ($input =~ /\.(?:patch|eml)\z/i) {
                 $ifmt = 'eml';
-        } elsif (-f $input && $input =~ m{\A(?:.+)/(?:new|cur)/([^/]+)\z}) {
+        } elsif ($input =~ m{\A(?:.+)/(?:new|cur)/([^/]+)\z} && -f $input) {
                 my $bn = $1;
                 my $fl = PublicInbox::MdirReader::maildir_basename_flags($bn);
                 return if index($fl, 'T') >= 0;
@@ -204,6 +209,10 @@ sub input_path_url {
         my $devfd = $lei->path_to_fd($input) // return;
         if ($devfd >= 0) {
                 $self->input_fh($ifmt, $lei->{$devfd}, $input, @args);
+        } elsif ($devfd < 0 && $input =~ m{\A(.+/)([0-9]+)\z} && -f $input) {
+                my ($dn, $n) = ($1, $2);
+                my $mhr = PublicInbox::MHreader->new($dn, $lei->{3});
+                $mhr->mh_read_one($n, $self->can('input_mh_cb'), $self);
         } elsif (-f $input && $ifmt eq 'eml') {
                 open my $fh, '<', $input or
                                         return $lei->fail("open($input): $!");
@@ -231,6 +240,10 @@ sub input_path_url {
                                                 $self->can('input_maildir_cb'),
                                                 $self, @args);
                 }
+        } elsif (-d _ && $ifmt eq 'mh') {
+                my $mhr = PublicInbox::MHreader->new($input.'/', $lei->{3});
+                $mhr->{sort} = $lei->{opt}->{sort};
+                $mhr->mh_each_eml($self->can('input_mh_cb'), $self, @args);
         } elsif (-d _ && $ifmt =~ /\A(?:v1|v2)\z/) {
                 my $ibx = PublicInbox::Inbox->new({inboxdir => $input});
                 each_ibx_eml($self, $ibx, @args);
@@ -354,13 +367,15 @@ sub prepare_inputs { # returns undef on error
                                 PublicInbox::MboxReader->reads($ifmt) or return
                                         $lei->fail("$ifmt not supported");
                         } elsif (-d $input_path) { # TODO extindex
-                                $ifmt =~ /\A(?:maildir|v1|v2|extindex)\z/ or
+                                $ifmt =~ /\A(?:maildir|mh|v1|v2|extindex)\z/ or
                                         return$lei->fail("$ifmt not supported");
                                 $input = $input_path;
                                 add_dir $lei, $istate, $ifmt, \$input;
-                        } elsif ($self->{missing_ok} && !-e _) {
+                        } elsif ($self->{missing_ok} &&
+                                        $ifmt =~ /\A(?:maildir|mh)\z/ &&
+                                        !-e $input_path) {
                                 # for "lei rm-watch" on missing Maildir
-                                $may_sync and $input = 'maildir:'.
+                                $may_sync and $input = "$ifmt:".
                                                 $lei->abs_path($input_path);
                         } else {
                                 my $m = "Unable to handle $input";
@@ -373,7 +388,7 @@ sub prepare_inputs { # returns undef on error
 $input is `eml', not --in-format=$in_fmt
 
                         push @{$sync->{no}}, $input if $sync;
-                } elsif (-f $input && $input =~ m{\A(.+)/(new|cur)/([^/]+)\z}) {
+                } elsif ($input =~ m{\A(.+)/(new|cur)/([^/]+)\z} && -f $input) {
                         # single file in a Maildir
                         my ($mdir, $nc, $bn) = ($1, $2, $3);
                         my $other = $mdir . ($nc eq 'new' ? '/cur' : '/new');
@@ -385,12 +400,24 @@ $input is `eml', not --in-format=$in_fmt
 
                         if ($sync) {
                                 $input = $lei->abs_path($mdir) . "/$nc/$bn";
-                                push @{$sync->{ok}}, $input if $sync;
+                                push @{$sync->{ok}}, $input;
                         }
                         require PublicInbox::MdirReader;
                 } else {
                         my $devfd = $lei->path_to_fd($input) // return;
-                        if ($devfd >= 0 || -f $input || -p _) {
+                        if ($devfd < 0 && $input =~ m{\A(.+)/([0-9]+)\z} &&
+                                        -f $input) { # single file in MH dir
+                                my ($mh, $n) = ($1, $2);
+                                lc($in_fmt//'eml') eq 'eml' or
+                                                return $lei->fail(<<"");
+$input is `eml', not --in-format=$in_fmt
+
+                                if ($sync) {
+                                        $input = $lei->abs_path($mh)."/$n";
+                                        push @{$sync->{ok}}, $input;
+                                }
+                                require PublicInbox::MHreader;
+                        } elsif ($devfd >= 0 || -f $input || -p _) {
                                 push @{$sync->{no}}, $input if $sync;
                                 push @f, $input;
                         } elsif (-d "$input/new" && -d "$input/cur") {
@@ -401,10 +428,13 @@ $input is `eml', not --in-format=$in_fmt
                                 add_dir $lei, $istate, 'v1', \$input;
                         } elsif (-e "$input/ei.lock") {
                                 add_dir $lei, $istate, 'extindex', \$input;
+                        } elsif (-f "$input/.mh_sequences") {
+                                add_dir $lei, $istate, 'mh', \$input;
                         } elsif ($self->{missing_ok} && !-e $input) {
                                 if ($lei->{cmd} eq 'p2q') {
                                         # will run "git format-patch"
                                 } elsif ($may_sync) { # for lei rm-watch
+                                        # FIXME: support MH, here
                                         $input = 'maildir:'.
                                                 $lei->abs_path($input);
                                 }
@@ -446,6 +476,14 @@ $input is `eml', not --in-format=$in_fmt
                         $lei->refresh_watches;
                 }
         }
+        if (my $mh = $istate->{mh}) {
+                require PublicInbox::MHreader;
+                grep(!m!\Amh:!i, @$mh) and die "BUG: @$mh (no pfx)";
+                if ($may_sync && $lei->{sto}) {
+                        $lei->lms(1)->lms_write_prepare->add_folders(@$mh);
+                        # $lei->refresh_watches; TODO
+                }
+        }
         require PublicInbox::ExtSearch if $istate->{extindex};
         $self->{inputs} = $inputs;
 }
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 17254a82..593715dc 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -435,15 +435,24 @@ sub folders {
         map { $_->[0] } @{$sth->fetchall_arrayref};
 }
 
+sub blob_mismatch ($$$) {
+        my ($f, $oidhex, $rawref) = @_;
+        my $sha = $HEXLEN2SHA{length($oidhex)};
+        my $got = git_sha($sha, $rawref)->hexdigest;
+        $got eq $oidhex ? undef : warn("$f changed $oidhex => $got\n");
+}
+
 sub local_blob {
         my ($self, $oidhex, $vrfy) = @_;
         my $dbh = $self->{dbh} //= dbh_new($self);
+        my $oidbin = pack('H*', $oidhex);
+
         my $b2n = $dbh->prepare(<<'');
 SELECT f.loc,b.name FROM blob2name b
 LEFT JOIN folders f ON b.fid = f.fid
 WHERE b.oidbin = ?
 
-        $b2n->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
+        $b2n->bind_param(1, $oidbin, SQL_BLOB);
         $b2n->execute;
         while (my ($d, $n) = $b2n->fetchrow_array) {
                 substr($d, 0, length('maildir:')) = '';
@@ -456,19 +465,28 @@ WHERE b.oidbin = ?
                         my $f = "$d/$x/$n";
                         open my $fh, '<', $f or next;
                         # some (buggy) Maildir writers are non-atomic:
-                        next unless -s $fh;
-                        my $raw = read_all($fh, -s _);
-                        if ($vrfy) {
-                                my $sha = $HEXLEN2SHA{length($oidhex)};
-                                my $got = git_sha($sha, \$raw)->hexdigest;
-                                if ($got ne $oidhex) {
-                                        warn "$f changed $oidhex => $got\n";
-                                        next;
-                                }
-                        }
+                        my $raw = read_all($fh, -s $fh // next);
+                        next if $vrfy && blob_mismatch $f, $oidhex, \$raw;
                         return \$raw;
                 }
         }
+
+        # MH, except `uid' is not always unique (can be packed)
+        $b2n = $dbh->prepare(<<'');
+SELECT f.loc,b.uid FROM blob2num b
+LEFT JOIN folders f ON b.fid = f.fid
+WHERE b.oidbin = ? AND f.loc REGEXP '^mh:/'
+
+        $b2n->bind_param(1, $oidbin, SQL_BLOB);
+        $b2n->execute;
+        while (my ($f, $n) = $b2n->fetchrow_array) {
+                $f =~ s/\Amh://s or die "BUG: not MH: $f";
+                $f .= "/$n";
+                open my $fh, '<', $f or next;
+                my $raw = read_all($fh, -s $fh // next);
+                next if blob_mismatch $f, $oidhex, \$raw;
+                return \$raw;
+        }
         undef;
 }
 
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 071ba113..de75e99e 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -400,6 +400,11 @@ sub new {
                                 "$dst exists and is not a directory\n";
                 $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
                 $lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
+        } elsif ($fmt eq 'mh') {
+                -e $dst && !-d _ and die
+                                "$dst exists and is not a directory\n";
+                $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
+                $lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
         } elsif (substr($fmt, 0, 4) eq 'mbox') {
                 require PublicInbox::MboxReader;
                 $self->can("eml2$fmt") or die "bad mbox format: $fmt\n";
diff --git a/lib/PublicInbox/MHreader.pm b/lib/PublicInbox/MHreader.pm
new file mode 100644
index 00000000..673e3e06
--- /dev/null
+++ b/lib/PublicInbox/MHreader.pm
@@ -0,0 +1,103 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# MH reader, based on Lib/mailbox.py in cpython source
+package PublicInbox::MHreader;
+use v5.12;
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::OnDestroy;
+use PublicInbox::IO qw(try_cat);
+use PublicInbox::MdirSort;
+use Carp qw(carp);
+use autodie qw(chdir closedir opendir);
+
+my %FL2OFF = ( # mh_sequences key => our keyword
+        replied => 0,
+        flagged => 1,
+        unseen => 2, # negate
+);
+my @OFF2KW = qw(answered flagged); # [2] => unseen (negated)
+
+sub new {
+        my ($cls, $dir, $cwdfh) = @_;
+        if (substr($dir, -1) ne '/') { # TODO: do this earlier
+                carp "W: appending `/' to `$dir' (fix caller)\n";
+                $dir .= '/';
+        }
+        bless { dir => $dir, cwdfh => $cwdfh }, $cls;
+}
+
+sub read_mh_sequences ($) { # caller must chdir($self->{dir})
+        my ($self) = @_;
+        my ($fl, $off, @n);
+        my @seq = ('', '', '');
+        for (split /\n+/s, try_cat('.mh_sequences')) {
+                ($fl, @n) = split /[: \t]+/;
+                $off = $FL2OFF{$fl} // do { warn <<EOM;
+W: unknown `$fl' in $self->{dir}.mh_sequences (ignoring)
+EOM
+                        next;
+                };
+                @n = grep /\A[0-9]+\z/s, @n; # don't stat, yet
+                if (@n) {
+                        @n = sort { $b <=> $a } @n; # to avoid resize
+                        my $buf = '';
+                        vec($buf, $_, 1) = 1 for @n;
+                        $seq[$off] = $buf;
+                }
+        }
+        \@seq;
+}
+
+sub mh_each_file {
+        my ($self, $efcb, @arg) = @_;
+        opendir(my $dh, my $dir = $self->{dir});
+        my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh});
+        chdir($dh);
+        if (defined(my $sort = $self->{sort})) {
+                my @sort = map {
+                        my @tmp = $_ eq '' ? ('sequence') : split(/[, ]/);
+                        # sorting by name alphabetically makes no sense for MH:
+                        for my $k (@tmp) {
+                                s/\A(\-|\+|)(?:name|)\z/$1sequence/;
+                        }
+                        @tmp;
+                } @$sort;
+                my @n = grep /\A[0-9]+\z/s, readdir $dh;
+                mdir_sort \@n, \@sort;
+                $efcb->($dir, $_, $self, @arg) for @n;
+        } else {
+                while (readdir $dh) { # perl v5.12+ to set $_ on readdir
+                        $efcb->($dir, $_, $self, @arg) if /\A[0-9]+\z/s;
+                }
+        }
+        closedir $dh; # may die
+}
+
+sub kw_for ($$) {
+        my ($self, $n) = @_;
+        my $seq = $self->{mh_seq} //= read_mh_sequences($self);
+        my @kw = map { vec($seq->[$_], $n, 1) ? $OFF2KW[$_] : () } (0, 1);
+        vec($seq->[2], $n, 1) or push @kw, 'seen';
+        \@kw;
+}
+
+sub _file2eml { # mh_each_file cb
+        my ($dir, $n, $self, $ucb, @arg) = @_;
+        my $eml = eml_from_path($n);
+        $ucb->($dir, $n, kw_for($self, $n), $eml, @arg) if $eml;
+}
+
+sub mh_each_eml {
+        my ($self, $ucb, @arg) = @_;
+        mh_each_file($self, \&_file2eml, $ucb, @arg);
+}
+
+sub mh_read_one {
+        my ($self, $n, $ucb, @arg) = @_;
+        my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh});
+        chdir(my $dir = $self->{dir});
+        _file2eml($dir, $n, $self, $ucb, @arg);
+}
+
+1;
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
index db5f4545..2981b058 100644
--- a/lib/PublicInbox/MdirReader.pm
+++ b/lib/PublicInbox/MdirReader.pm
@@ -1,7 +1,7 @@
 # Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# Maildirs for now, MH eventually
+# Maildirs only (PublicInbox::MHreader exists, now)
 # ref: https://cr.yp.to/proto/maildir.html
 #        https://wiki2.dovecot.org/MailboxFormat/Maildir
 package PublicInbox::MdirReader;
diff --git a/lib/PublicInbox/MdirSort.pm b/lib/PublicInbox/MdirSort.pm
new file mode 100644
index 00000000..6bd9fb6c
--- /dev/null
+++ b/lib/PublicInbox/MdirSort.pm
@@ -0,0 +1,46 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# used for sorting MH (and (TODO) Maildir) names
+# TODO: consider sort(1) to parallelize sorting of gigantic directories
+package PublicInbox::MdirSort;
+use v5.12;
+use Time::HiRes ();
+use parent qw(Exporter);
+use Fcntl qw(S_ISREG);
+our @EXPORT = qw(mdir_sort);
+my %ST = (sequence => 0, size => 1, atime => 2, mtime => 3, ctime => 4);
+
+sub mdir_sort ($$;$) {
+        my ($ent, $sort, $max) = @_;
+        my @st;
+        my @ent = map {
+                @st = Time::HiRes::stat $_;
+                # name, size, {a,m,c}time
+                S_ISREG($st[2]) ? [ $_, @st[7..10] ] : ();
+        } @$ent;
+        @ent = grep { $_->[1] <= $max } @ent if $max;
+        use sort 'stable';
+        for my $s (@$sort) {
+                if ($s =~ /\A(\-|\+|)name\z/) {
+                        if ($1 eq '-') {
+                                @ent = sort { $b->[0] cmp $a->[0] } @ent;
+                        } else {
+                                @ent = sort { $a->[0] cmp $b->[0] } @ent;
+                        }
+                } elsif ($s =~ /\A(\-|\+|)
+                                (sequence|size|ctime|mtime|atime)\z/x) {
+                        my $key = $ST{$2};
+                        if ($1 eq '-') {
+                                @ent = sort { $b->[$key] <=> $a->[$key] } @ent;
+                        } else {
+                                @ent = sort { $a->[$key] <=> $b->[$key] } @ent;
+                        }
+                } else {
+                        die "E: unrecognized sort parameter: `$s'";
+                }
+        }
+        @$ent = map { $_->[0] } @ent;
+}
+
+1;
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index b0f28e16..d20bff28 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -24,6 +24,7 @@ BEGIN {
         @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
                 run_script start_script key2sub xsys xsys_e xqx eml_load tick
                 have_xapian_compact json_utf8 setup_public_inboxes create_inbox
+                create_dir
                 create_coderepo require_bsd kernel_version check_broken_tmpfs
                 quit_waiter_pipe wait_for_eof require_git_http_backend
                 tcp_host_port test_lei lei lei_ok $lei_out $lei_err $lei_opt
@@ -843,26 +844,24 @@ sub my_sum {
         substr PublicInbox::SHA::sha256_hex(join('', @l)), 0, 8;
 }
 
-sub create_coderepo ($$;@) {
-        my $ident = shift;
-        my $cb = pop;
+sub create_dir (@) {
+        my ($ident, $cb) = (shift, pop);
         my %opt = @_;
         require PublicInbox::Lock;
         require PublicInbox::Import;
-        my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
-        my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
         my $tmpdir = delete $opt{tmpdir};
-        my $dir = "t/data-gen/$base.$ident-".my_sum($db, $cb, \%opt);
+        my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
+        my $dir = "t/data-gen/$base.$ident-".my_sum($cb, \%opt);
         require File::Path;
         my $new = File::Path::make_path($dir);
         my $lk = PublicInbox::Lock->new("$dir/creat.lock");
         my $scope = $lk->lock_for_scope;
         if (!-f "$dir/creat.stamp") {
-                opendir(my $dfh, '.');
+                opendir(my $cwd, '.');
                 chdir($dir);
                 local %ENV = (%ENV, %COMMIT_ENV);
                 $cb->($dir);
-                chdir($dfh);
+                chdir($cwd); # some $cb chdir around
                 open my $s, '>', "$dir/creat.stamp";
         }
         return $dir if !defined($tmpdir);
@@ -870,6 +869,13 @@ sub create_coderepo ($$;@) {
         $tmpdir;
 }
 
+sub create_coderepo (@) {
+        my $ident = shift;
+        require PublicInbox::Import;
+        my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
+        create_dir "$ident-$db", @_;
+}
+
 sub create_inbox ($;@) {
         my $ident = shift;
         my $cb = pop;
diff --git a/t/mh_reader.t b/t/mh_reader.t
new file mode 100644
index 00000000..e8f69fa8
--- /dev/null
+++ b/t/mh_reader.t
@@ -0,0 +1,107 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use PublicInbox::TestCommon;
+require_ok 'PublicInbox::MHreader';
+use PublicInbox::IO qw(write_file);
+use PublicInbox::Lock;
+use PublicInbox::OnDestroy;
+use PublicInbox::Eml;
+use autodie;
+opendir my $cwdfh, '.';
+
+my $normal = create_dir 'normal', sub {
+        write_file '>', 3, "Subject: replied a\n\n";
+        write_file '>', 4, "Subject: replied b\n\n";
+        write_file '>', 1, "Subject: unseen\n\n";
+        write_file '>', 2, "Subject: unseen flagged\n\n";
+        write_file '>', '.mh_sequences', <<EOM;
+unseen: 1 2
+flagged: 2
+replied: 3 4
+EOM
+};
+
+my $for_sort = create_dir 'size', sub {
+        for (1..3) {
+                my $name = 10 - $_;
+                write_file '>', $name, "Subject: ".($_ x $_)."\n\n";
+        }
+};
+
+my $stale = create_dir 'stale', sub {
+        write_file '>', 4, "Subject: msg 4\n\n";
+        write_file '>', '.mh_sequences', <<EOM;
+unseen: 1 2
+EOM
+};
+
+{
+        my $mhr = PublicInbox::MHreader->new("$normal/", $cwdfh);
+        $mhr->{sort} = [ '' ];
+        my @res;
+        $mhr->mh_each_eml(sub { push @res, \@_; }, [ 'bogus' ]);
+        is scalar(@res), 4, 'got 4 messages' or diag explain(\@res);
+        is_deeply [map { $_->[1] } @res], [1, 2, 3, 4],
+                'got messages in expected order';
+        is scalar(grep { $_->[4]->[0] eq 'bogus' } @res), scalar(@res),
+                'cb arg passed to all messages' or diag explain(\@res);
+
+        $mhr = PublicInbox::MHreader->new("$stale/", $cwdfh);
+        @res = ();
+        $mhr->mh_each_eml(sub { push @res, \@_; });
+        is scalar(@res), 1, 'ignored stale messages';
+}
+
+test_lei(sub {
+        lei_ok qw(convert -f mboxrd), $normal;
+        my @msgs = grep /\S/s, split /^From .[^\n]+\n/sm, $lei_out;
+        my @eml = map { PublicInbox::Eml->new($_) } @msgs;
+        my $h = 'Subject';
+        @eml = sort { $a->header_raw($h) cmp $b->header_raw($h) } @eml;
+        my @has = map { scalar $_->header_raw($h) } @eml;
+        is_xdeeply \@has,
+                [ 'replied a', 'replied b', 'unseen', 'unseen flagged' ],
+                'subjects sorted';
+        $h = 'X-Status';
+        @has = map { scalar $_->header_raw($h) } @eml;
+        is_xdeeply \@has, [ 'A', 'A', undef, 'F' ], 'answered and flagged kw';
+        $h = 'Status';
+        @has = map { scalar $_->header_raw($h) } @eml;
+        is_xdeeply \@has, ['RO', 'RO', 'O', 'O'], 'read and old';
+        lei_ok qw(import +L:normal), $normal;
+        lei_ok qw(q L:normal -f mboxrd);
+        @msgs = grep /\S/s, split /^From .[^\n]+\n/sm, $lei_out;
+        my @eml2 = map { PublicInbox::Eml->new($_) } @msgs;
+        $h = 'Subject';
+        @eml2 = sort { $a->header_raw($h) cmp $b->header_raw($h) } @eml2;
+        is_xdeeply \@eml2, \@eml, 'import preserved kw';
+
+        lei_ok 'ls-mail-sync';
+        is $lei_out, 'mh:'.File::Spec->rel2abs($normal)."\n",
+                'mail sync stored';
+
+        lei_ok qw(convert -s size -f mboxrd), "mh:$for_sort";
+        chomp(my @s = grep /^Subject:/, split(/^/sm, $lei_out));
+        s/^Subject: // for @s;
+        is_xdeeply \@s, [ 1, 22, 333 ], 'sorted by size';
+
+        for my $s ([], [ 'name' ], [ 'sequence' ]) {
+                lei_ok qw(convert -f mboxrd), "mh:$for_sort", '-s', @$s;
+                chomp(@s = grep /^Subject:/, split(/^/sm, $lei_out));
+                s/^Subject: // for @s;
+                my $desc = "@$s" || '(default)';
+                is_xdeeply \@s, [ 333, 22, 1 ], "sorted by: $desc";
+        }
+
+        lei_ok qw(import +L:sorttest), "MH:$for_sort";
+        lei_ok 'ls-mail-sync', $for_sort;
+        is $lei_out, 'mh:'.File::Spec->rel2abs($for_sort)."\n",
+                "mail sync stored with `MH' normalized to `mh'";
+        lei_ok qw(index), 'mh:'.$stale;
+        lei qw(q -f mboxrd), 's:msg 4';
+        like $lei_out, qr/^Subject: msg 4\nStatus: RO\n\n\n/ms,
+                "message retrieved after `lei index'"
+});
+
+done_testing;