From 34f4c437bff62a3297b2fcebf31fcdb24638dec9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 7 Jan 2019 08:02:13 +0000 Subject: hoist out resolve_repo_dir from -index We'll be using it in future admin tools, and making this easier-to-test. --- MANIFEST | 2 ++ lib/PublicInbox/Admin.pm | 44 +++++++++++++++++++++++++ script/public-inbox-index | 32 ++----------------- t/admin.t | 81 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+), 30 deletions(-) create mode 100644 lib/PublicInbox/Admin.pm create mode 100644 t/admin.t diff --git a/MANIFEST b/MANIFEST index e4f3df86..5ac85c3f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -51,6 +51,7 @@ examples/unsubscribe.milter examples/unsubscribe.psgi examples/varnish-4.vcl lib/PublicInbox/Address.pm +lib/PublicInbox/Admin.pm lib/PublicInbox/AltId.pm lib/PublicInbox/Config.pm lib/PublicInbox/ContentId.pm @@ -141,6 +142,7 @@ scripts/slrnspool2maildir scripts/ssoma-replay scripts/xhdr-num2mid t/address.t +t/admin.t t/altid.t t/altid_v2.t t/cgi.t diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm new file mode 100644 index 00000000..d0a8dd00 --- /dev/null +++ b/lib/PublicInbox/Admin.pm @@ -0,0 +1,44 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ + +# common stuff for administrative command-line tools +# Unstable internal API +package PublicInbox::Admin; +use strict; +use warnings; +use Cwd 'abs_path'; +use base qw(Exporter); +our @EXPORT_OK = qw(resolve_repo_dir); + +sub resolve_repo_dir { + my ($cd, $ver) = @_; + my $prefix = defined $cd ? $cd : './'; + if (-d $prefix && -f "$prefix/inbox.lock") { # v2 + $$ver = 2 if $ver; + return abs_path($prefix); + } + + my @cmd = qw(git rev-parse --git-dir); + my $cmd = join(' ', @cmd); + my $pid = open my $fh, '-|'; + defined $pid or die "forking $cmd failed: $!\n"; + if ($pid == 0) { + if (defined $cd) { + chdir $cd or die "chdir $cd failed: $!\n"; + } + exec @cmd; + die "Failed to exec $cmd: $!\n"; + } else { + my $dir = eval { + local $/; + <$fh>; + }; + close $fh or die "error in $cmd: $!\n"; + chomp $dir; + $$ver = 1 if $ver; + return abs_path($cd) if ($dir eq '.' && defined $cd); + abs_path($dir); + } +} + +1; diff --git a/script/public-inbox-index b/script/public-inbox-index index 73ad9bc4..32121f6d 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -9,9 +9,10 @@ use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -use Cwd 'abs_path'; my $usage = "public-inbox-index REPO_DIR"; use PublicInbox::Config; +use PublicInbox::Admin qw(resolve_repo_dir); + my $config = eval { PublicInbox::Config->new } || eval { warn "public-inbox unconfigured for serving, indexing anyways...\n"; {} @@ -35,35 +36,6 @@ die "--jobs must be positive\n" if defined $jobs && $jobs < 0; my @dirs; -sub resolve_repo_dir { - my ($cd) = @_; - my $prefix = defined $cd ? $cd : './'; - if (-d $prefix && -f "$prefix/inbox.lock") { # v2 - return abs_path($prefix); - } - - my @cmd = qw(git rev-parse --git-dir); - my $cmd = join(' ', @cmd); - my $pid = open my $fh, '-|'; - defined $pid or die "forking $cmd failed: $!\n"; - if ($pid == 0) { - if (defined $cd) { - chdir $cd or die "chdir $cd failed: $!\n"; - } - exec @cmd; - die "Failed to exec $cmd: $!\n"; - } else { - my $dir = eval { - local $/; - <$fh>; - }; - close $fh or die "error in $cmd: $!\n"; - chomp $dir; - return abs_path($cd) if ($dir eq '.' && defined $cd); - abs_path($dir); - } -} - if (@ARGV) { @dirs = map { resolve_repo_dir($_) } @ARGV; } else { diff --git a/t/admin.t b/t/admin.t new file mode 100644 index 00000000..cc1e65d1 --- /dev/null +++ b/t/admin.t @@ -0,0 +1,81 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw(tempdir); +# use Cwd qw(getcwd); +use_ok 'PublicInbox::Admin', qw(resolve_repo_dir); +my $tmpdir = tempdir('pi-admin.XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $git_dir = "$tmpdir/v1"; +my $v2_dir = "$tmpdir/v2"; +my ($res, $err, $v); + +is(0, system(qw(git init -q --bare), $git_dir), 'git init v1'); + +# v1 +is(resolve_repo_dir($git_dir), $git_dir, 'top-level GIT_DIR resolved'); +is(resolve_repo_dir("$git_dir/objects"), $git_dir, 'GIT_DIR/objects resolved'); + +ok(chdir($git_dir), 'chdir GIT_DIR works'); +is(resolve_repo_dir(), $git_dir, 'resolve_repo_dir works in GIT_DIR'); + +ok(chdir("$git_dir/objects"), 'chdir GIT_DIR/objects works'); +is(resolve_repo_dir(), $git_dir, 'resolve_repo_dir works in GIT_DIR'); +$res = resolve_repo_dir(undef, \$v); +is($v, 1, 'version 1 detected'); +is($res, $git_dir, 'detects directory along with version'); + +# $tmpdir could be inside a git working, directory, so we test '/' +SKIP: { + my $no_vcs_dir = '/'; + # do people version-control "/"? + skip "$no_vcs_dir is version controlled by git", 4 if -d '/.git'; + open my $null, '>', '/dev/null' or die "open /dev/null: $!"; + open my $olderr, '>&', \*STDERR or die "dup stderr: $!"; + + ok(chdir($no_vcs_dir), 'chdir to a non-inbox'); + open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!"; + $res = eval { resolve_repo_dir() }; + open STDERR, '>&', $olderr or die "restore stderr: $!"; + is($res, undef, 'fails inside non-version-controlled dir'); + + ok(chdir($tmpdir), 'back to test-specific $tmpdir'); + open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!"; + $res = eval { resolve_repo_dir($no_vcs_dir) }; + $err = $@; + open STDERR, '>&', $olderr or die "restore stderr: $!"; + is($res, undef, 'fails on non-version-controlled dir'); + ok($err, '$@ set on failure'); +} + +# v2 +SKIP: { + for my $m (qw(DBD::SQLite Search::Xapian)) { + skip "$m missing", 5 unless eval "require $m"; + } + use_ok 'PublicInbox::V2Writable'; + use_ok 'PublicInbox::Inbox'; + my $ibx = PublicInbox::Inbox->new({ + mainrepo => $v2_dir, + name => 'test-v2writable', + version => 2, + -primary_address => 'test@example.com', + indexlevel => 'basic', + }); + PublicInbox::V2Writable->new($ibx, 1)->idx_init; + + ok(-e "$v2_dir/inbox.lock", 'exists'); + is(resolve_repo_dir($v2_dir), $v2_dir, + 'resolve_repo_dir works on v2_dir'); + ok(chdir($v2_dir), 'chdir v2_dir OK'); + is(resolve_repo_dir(), $v2_dir, 'resolve_repo_dir works inside v2_dir'); + $res = resolve_repo_dir(undef, \$v); + is($v, 2, 'version 2 detected'); + is($res, $v2_dir, 'detects directory along with version'); + + # TODO: should work from inside Xapian dirs, and git dirs, here... +} + +chdir '/'; +done_testing(); -- cgit v1.2.3-24-ge0c7 From 9739f3aa67425958f306f8418c0c6fbf186481c3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 11 Jan 2019 03:50:51 +0000 Subject: import: purge: reap fast-export process Zombies are bad. --- lib/PublicInbox/Import.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index fd4255cf..7e596abc 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -495,7 +495,7 @@ sub purge_oids { my $old = $self->{'ref'}; my $git = $self->{git}; my @export = (qw(fast-export --no-data --use-done-feature), $old); - my ($rd, $pid) = $git->popen(@export); + my $rd = $git->popen(@export); my ($r, $w) = $self->gfi_start; my @buf; my $npurge = 0; @@ -550,6 +550,7 @@ sub purge_oids { push @buf, $_; } } + close $rd or die "close fast-export failed: $?"; if (@buf) { $w->print(@buf) or wfail; } -- cgit v1.2.3-24-ge0c7 From a6823087e5aa9699a42c31df16323885c70d1b4d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 11 Jan 2019 00:59:51 +0000 Subject: v2writable: ->purge returns undef on no-op And doesn't try to access undef as an array ref. --- lib/PublicInbox/V2Writable.pm | 2 +- t/v2writable.t | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 222df5c2..08d18fc0 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -390,7 +390,7 @@ sub purge { my ($self, $mime) = @_; my $purges = $self->{-inbox}->with_umask(sub { remove_internal($self, $mime, undef, {}); - }); + }) or return; $self->idx_init if @$purges; # ->done is called on purges for my $i (0..$#$purges) { defined(my $cmt = $purges->[$i]) or next; diff --git a/t/v2writable.t b/t/v2writable.t index ec9f56d9..f1714175 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -247,6 +247,9 @@ EOF ok(my $cmts = $im->purge($mime), 'purged message'); like($cmts->[0], qr/\A[a-f0-9]{40}\z/, 'purge returned current commit'); $im->done; + + # again + is($im->purge($mime), undef, 'no-op returns undef'); } { -- cgit v1.2.3-24-ge0c7 From ec4f98cb7ad6f6baf19ca208947fcb563a4ed8fe Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 11 Jan 2019 01:02:45 +0000 Subject: v2writable: purge ignores non-existent git epoch directories We don't require every git epoch to exist since we support the --skip feature in public-inbox-init. --- lib/PublicInbox/V2Writable.pm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 08d18fc0..970244eb 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -286,7 +286,9 @@ sub purge_oids { my $pfx = "$self->{-inbox}->{mainrepo}/git"; my $purges = []; foreach my $i (0..$self->{epoch_max}) { - my $git = PublicInbox::Git->new("$pfx/$i.git"); + my $git_dir = "$pfx/$i.git"; + -d $git_dir or next; + my $git = PublicInbox::Git->new($git_dir); my $im = $self->import_init($git, 0, 1); $purges->[$i] = $im->purge_oids($purge); } -- cgit v1.2.3-24-ge0c7 From 5126394a3806bd438bd0ef9c596ffa2e3cfb8db9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 11 Jan 2019 02:47:57 +0000 Subject: v2writable: cleanup processes when done Otherwise, Perl may exit successfully when a failure code is desired. --- lib/PublicInbox/V2Writable.pm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 970244eb..ec28e510 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -291,6 +291,7 @@ sub purge_oids { my $git = PublicInbox::Git->new($git_dir); my $im = $self->import_init($git, 0, 1); $purges->[$i] = $im->purge_oids($purge); + $im->done; } $purges; } @@ -499,6 +500,7 @@ sub done { delete $self->{bnote}; $self->{transact_bytes} = 0; $self->lock_release if $parts; + $self->{-inbox}->git->cleanup; } sub git_init { -- cgit v1.2.3-24-ge0c7 From 995849ed8a5a6f96105263a546e55e6c8811e76e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 11 Jan 2019 02:48:18 +0000 Subject: v2writable: read epoch on purge A stand-alone tool for purge will won't know the epoch if nothing was ->add()-ed before. --- lib/PublicInbox/V2Writable.pm | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index ec28e510..1f17fe21 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -285,7 +285,13 @@ sub purge_oids { $self->done; my $pfx = "$self->{-inbox}->{mainrepo}/git"; my $purges = []; - foreach my $i (0..$self->{epoch_max}) { + my $max = $self->{epoch_max}; + + unless (defined($max)) { + defined(my $latest = git_dir_latest($self, \$max)) or return; + $self->{epoch_max} = $max; + } + foreach my $i (0..$max) { my $git_dir = "$pfx/$i.git"; -d $git_dir or next; my $git = PublicInbox::Git->new($git_dir); -- cgit v1.2.3-24-ge0c7 From 440b0feaa209e12e4bcb8ef16a95041fce71e7dc Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 10 Jan 2019 21:14:27 +0000 Subject: implement public-inbox-purge tool Expose the ->purge functionality of V2Writable for rewriting git history to permanently purge messages from history. This may be necessary for legal reasons. Usage: # requires ~/.public-inbox/config public-inbox-purge --all +# License: AGPL-3.0+ +# +# Used for purging messages entirely from a public-inbox. Currently +# supports v2 inboxes only, for now. +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use PublicInbox::Config; +use PublicInbox::MIME; +use PublicInbox::Admin qw(resolve_repo_dir); +use PublicInbox::Filter::Base; +*REJECT = *PublicInbox::Filter::Base::REJECT; + +my $usage = "$0 [--all] [INBOX_DIRS] new }; +my $cfgfile = PublicInbox::Config::default_file(); +my ($all, $force); +my $verbose = 1; +my %opts = ( + 'all' => \$all, + 'force|f' => \$force, + 'verbose|v!' => \$verbose, +); +GetOptions(%opts) or die "bad command-line args\n", $usage, "\n"; + +# TODO: clean this up and share code with -index via ::Admin +my %dir2ibx; # ( path => Inbox object ) +my @inboxes; +$config and $config->each_inbox(sub { + my ($ibx) = @_; + push @inboxes, $ibx if $all && $ibx->{version} != 1; + $dir2ibx{$ibx->{mainrepo}} = $ibx; +}); + +if ($all) { + $config or die "--all specified, but $cfgfile not readable\n"; + @ARGV and die "--all specified, but directories specified\n"; +} else { + my @err; + my @dirs = scalar(@ARGV) ? @ARGV : ('.'); + my $u = 0; + + foreach my $dir (@dirs) { + my $v; + my $dir = resolve_repo_dir($dir, \$v); + if ($v == 1) { + push @err, $dir; + next; + } + my $ibx = $dir2ibx{$dir} ||= do { + warn "$dir not configured in $cfgfile\n"; + $u++; + my $name = "unconfigured-$u"; + PublicInbox::Inbox->new({ + version => 2, + name => $name, + -primary_address => "$name\@example.com", + mainrepo => $dir, + }); + }; + push @inboxes, $ibx; + } + + if (@err) { + die "v1 inboxes currently not supported by -purge\n\t", + join("\n\t", @err), "\n"; + } +} + +my $data = do { local $/; scalar }; +$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $n_purged = 0; + +foreach my $ibx (@inboxes) { + my $mime = PublicInbox::MIME->new($data); + my $v2w = PublicInbox::V2Writable->new($ibx, 0); + + my $commits = $v2w->purge($mime) || []; + + if (my $scrub = $ibx->filter($v2w)) { + my $scrubbed = $scrub->scrub($mime, 1); + + if ($scrubbed && $scrubbed != REJECT()) { + my $scrub_commits = $v2w->purge($scrubbed); + push @$commits, @$scrub_commits if $scrub_commits; + } + } + + $v2w->done; + + if ($verbose) { # should we consider this machine-parseable? + print "$ibx->{mainrepo}:"; + if (scalar @$commits) { + print join("\n\t", '', @$commits), "\n"; + } else { + print " NONE\n"; + } + } + $n_purged += scalar @$commits; +} + +# behave like "rm -f" +exit(0) if ($force || $n_purged); + +warn "Not found\n" if $verbose; +exit(1); diff --git a/t/purge.t b/t/purge.t new file mode 100644 index 00000000..94060055 --- /dev/null +++ b/t/purge.t @@ -0,0 +1,97 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +require './t/common.perl'; +require_git(2.6); +my @mods = qw(IPC::Run DBI DBD::SQLite Search::Xapian); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "missing $_ for t/purge.t" if $@; +}; +use Cwd qw(abs_path); +my $purge = abs_path('blib/script/public-inbox-purge'); +my $tmpdir = tempdir('pi-purge-XXXXXX', TMPDIR => 1, CLEANUP => 1); +use_ok 'PublicInbox::V2Writable'; +my $mainrepo = "$tmpdir/v2"; +my $ibx = PublicInbox::Inbox->new({ + mainrepo => $mainrepo, + name => 'test-v2purge', + version => 2, + -primary_address => 'test@example.com', + indexlevel => 'basic', +}); + +my $raw = <<'EOF'; +From: a@example.com +To: test@example.com +Subject: this is a subject +Message-ID: +Date: Fri, 02 Oct 1993 00:00:00 +0000 + +Hello World + +EOF + +local $ENV{NPROC} = '1'; +my $cfgfile = "$tmpdir/config"; +local $ENV{PI_CONFIG} = $cfgfile; +open my $cfg_fh, '>', $cfgfile or die "open: $!"; + +my $v2w = PublicInbox::V2Writable->new($ibx, 1); +my $mime = PublicInbox::MIME->new($raw); +ok($v2w->add($mime), 'add message to be purged'); +$v2w->done; + +# failing cases, first: +my $in = "$raw\nMOAR\n"; +my ($out, $err) = ('', ''); +ok(IPC::Run::run([$purge, '-f', $mainrepo], \$in, \$out, \$err), + 'purge -f OK'); + +$out = $err = ''; +ok(!IPC::Run::run([$purge, $mainrepo], \$in, \$out, \$err), + 'mismatch fails without -f'); +is($? >> 8, 1, 'missed purge exits with 1'); + +# a successful case: +ok(IPC::Run::run([$purge, $mainrepo], \$raw, \$out, \$err), 'match OK'); +like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted'); + +# add (old) vger filter to config file +print $cfg_fh <add($mime), 'add vger-signatured message to be purged'); +$v2w->done; + +my $pre_scrub = $raw . <<'EOF'; + +-- +To unsubscribe from this list: send the line "unsubscribe linux-kernel" in +the body of a message to majordomo@vger.kernel.org +More majordomo info at http://vger.kernel.org/majordomo-info.html +Please read the FAQ at http://www.tux.org/lkml/ +EOF + +$out = $err = ''; +ok(chdir('/'), "chdir / OK for --all test"); +ok(IPC::Run::run([$purge, '--all'], \$pre_scrub, \$out, \$err), + 'scrub purge OK'); +like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted'); +# diag "out: $out"; diag "err: $err"; + +$out = $err = ''; +ok(!IPC::Run::run([$purge, '--all' ], \$pre_scrub, \$out, \$err), + 'scrub purge not idempotent without -f'); +# diag "out: $out"; diag "err: $err"; + +done_testing(); -- cgit v1.2.3-24-ge0c7