diff options
author | Eric Wong <e@80x24.org> | 2019-01-10 21:14:27 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2019-01-11 04:07:17 +0000 |
commit | 440b0feaa209e12e4bcb8ef16a95041fce71e7dc (patch) | |
tree | 493c947adb8a12aa5e8ab24984eafa81120b167a /script | |
parent | 995849ed8a5a6f96105263a546e55e6c8811e76e (diff) | |
download | public-inbox-440b0feaa209e12e4bcb8ef16a95041fce71e7dc.tar.gz |
Expose the ->purge functionality of V2Writable for rewriting git history to permanently purge messages from history. This may be necessary for legal reasons. Usage: # requires ~/.public-inbox/config public-inbox-purge --all </path/to/message-to-purge # good for testing with unconfigured inboxes: public-inbox-purge $INBOX_DIR </path/to/message-to-purge
Diffstat (limited to 'script')
-rwxr-xr-x | script/public-inbox-purge | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/script/public-inbox-purge b/script/public-inbox-purge new file mode 100755 index 00000000..688dd950 --- /dev/null +++ b/script/public-inbox-purge @@ -0,0 +1,111 @@ +#!/usr/bin/perl -w +# Copyright (C) 2019 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# Used for purging messages entirely from a public-inbox. Currently +# supports v2 inboxes only, for now. +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use PublicInbox::Config; +use PublicInbox::MIME; +use PublicInbox::Admin qw(resolve_repo_dir); +use PublicInbox::Filter::Base; +*REJECT = *PublicInbox::Filter::Base::REJECT; + +my $usage = "$0 [--all] [INBOX_DIRS] </path/to/message"; + +eval { require PublicInbox::V2Writable } or die + "DBI, DBD::SQLite and Search::Xapian required for purge\n"; +my $config = eval { PublicInbox::Config->new }; +my $cfgfile = PublicInbox::Config::default_file(); +my ($all, $force); +my $verbose = 1; +my %opts = ( + 'all' => \$all, + 'force|f' => \$force, + 'verbose|v!' => \$verbose, +); +GetOptions(%opts) or die "bad command-line args\n", $usage, "\n"; + +# TODO: clean this up and share code with -index via ::Admin +my %dir2ibx; # ( path => Inbox object ) +my @inboxes; +$config and $config->each_inbox(sub { + my ($ibx) = @_; + push @inboxes, $ibx if $all && $ibx->{version} != 1; + $dir2ibx{$ibx->{mainrepo}} = $ibx; +}); + +if ($all) { + $config or die "--all specified, but $cfgfile not readable\n"; + @ARGV and die "--all specified, but directories specified\n"; +} else { + my @err; + my @dirs = scalar(@ARGV) ? @ARGV : ('.'); + my $u = 0; + + foreach my $dir (@dirs) { + my $v; + my $dir = resolve_repo_dir($dir, \$v); + if ($v == 1) { + push @err, $dir; + next; + } + my $ibx = $dir2ibx{$dir} ||= do { + warn "$dir not configured in $cfgfile\n"; + $u++; + my $name = "unconfigured-$u"; + PublicInbox::Inbox->new({ + version => 2, + name => $name, + -primary_address => "$name\@example.com", + mainrepo => $dir, + }); + }; + push @inboxes, $ibx; + } + + if (@err) { + die "v1 inboxes currently not supported by -purge\n\t", + join("\n\t", @err), "\n"; + } +} + +my $data = do { local $/; scalar <STDIN> }; +$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $n_purged = 0; + +foreach my $ibx (@inboxes) { + my $mime = PublicInbox::MIME->new($data); + my $v2w = PublicInbox::V2Writable->new($ibx, 0); + + my $commits = $v2w->purge($mime) || []; + + if (my $scrub = $ibx->filter($v2w)) { + my $scrubbed = $scrub->scrub($mime, 1); + + if ($scrubbed && $scrubbed != REJECT()) { + my $scrub_commits = $v2w->purge($scrubbed); + push @$commits, @$scrub_commits if $scrub_commits; + } + } + + $v2w->done; + + if ($verbose) { # should we consider this machine-parseable? + print "$ibx->{mainrepo}:"; + if (scalar @$commits) { + print join("\n\t", '', @$commits), "\n"; + } else { + print " NONE\n"; + } + } + $n_purged += scalar @$commits; +} + +# behave like "rm -f" +exit(0) if ($force || $n_purged); + +warn "Not found\n" if $verbose; +exit(1); |