From 9fcce78e40b0a7c61797be2aff6c5afeb474568e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 20 Dec 2020 06:30:11 +0000 Subject: script/public-inbox-*: favor caller-provided pathnames We'll try to avoid calling Cwd::abs_path and use File::Spec->rel2abs instead, since abs_path will resolve symlinks the user specified on the command-line. Unfortunately, ->rel2abs still leaves "/.." and "/../" uncollapsed, so we still need to fall back to Cwd::abs_path in those cases. While we are at it, we'll also resolve inboxdir from deep inside v2 directories instead of misdetecting them as v1 bare git repos. In any case, stop matching directories by name and instead rely on the unique combination of st_dev + st_ino on stat() as we started doing in the extindex code. --- script/public-inbox-convert | 27 +++++++-------------------- script/public-inbox-init | 10 ++-------- 2 files changed, 9 insertions(+), 28 deletions(-) (limited to 'script') diff --git a/script/public-inbox-convert b/script/public-inbox-convert index b61c743f..fbd527a6 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -47,34 +47,21 @@ die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq ''); die "$new_dir exists\n" if -d $new_dir; die "$old_dir not a directory\n" unless -d $old_dir; -require Cwd; -Cwd->import('abs_path'); +require PublicInbox::Admin; require PublicInbox::Config; require PublicInbox::InboxWritable; -my $abs = abs_path($old_dir); -die "failed to resolve $old_dir: $!\n" if (!defined($abs)); - my $cfg = PublicInbox::Config->new; -my $old; -$cfg->each_inbox(sub { - $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir; -}); -if ($old) { - $old = PublicInbox::InboxWritable->new($old); -} else { +my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg); +@old > 1 and die "BUG: resolved several inboxes from $old_dir:\n", + map { "\t$_->{inboxdir}\n" } @old; +my $old = PublicInbox::InboxWritable->new($old[0]); +if (delete $old->{-unconfigured}) { warn "W: $old_dir not configured in " . PublicInbox::Config::default_file() . "\n"; - $old = PublicInbox::InboxWritable->new({ - inboxdir => $old_dir, - name => 'ignored', - -primary_address => 'old@example.com', - address => [ 'old@example.com' ], - }); } die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2; -require File::Spec; require PublicInbox::Admin; my $detected = PublicInbox::Admin::detect_indexlevel($old); $old->{indexlevel} //= $detected; @@ -88,7 +75,7 @@ if ($opt->{'index'}) { } local %ENV = (%$env, %ENV) if $env; my $new = { %$old }; -$new->{inboxdir} = File::Spec->canonpath($new_dir); +$new->{inboxdir} = PublicInbox::Admin::rel2abs_collapsed($new_dir); $new->{version} = 2; $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); $new->{-no_fsync} = 1 if !$opt->{fsync}; diff --git a/script/public-inbox-init b/script/public-inbox-init index c775eb31..eb605a51 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -138,10 +138,9 @@ close($fh) or die "failed to close $pi_config_tmp: $!\n"; my $pfx = "publicinbox.$name"; my @x = (qw/git config/, "--file=$pi_config_tmp"); -require File::Spec; -$inboxdir = File::Spec->canonpath($inboxdir); +PublicInbox::Admin::rel2abs_collapsed($inboxdir); +die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0; -die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; if (-f "$inboxdir/inbox.lock") { if (!defined $version) { $version = 2; @@ -186,11 +185,6 @@ if ($skip_docdata) { $ibx->{-skip_docdata} = $skip_docdata; } $ibx->init_inbox(0, $skip_epoch, $skip_artnum); -require Cwd; -my $tmp = Cwd::abs_path($inboxdir); -defined($tmp) or die "failed to resolve $inboxdir: $!\n"; -$inboxdir = $tmp; -die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; # needed for git prior to v2.1.0 umask(0077) if defined $perm; -- cgit v1.2.3-24-ge0c7 From 3e9888ed30b7fe092b03789d19a8020d4bc0fb39 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Dec 2020 07:51:20 +0000 Subject: use rel2abs_collapsed when loading Inbox objects We need to canonicalize paths for inboxes which do not have a newsgroup defined, otherwise ->eidx_key matches can fail in unexpected ways. --- script/public-inbox-convert | 2 +- script/public-inbox-init | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'script') diff --git a/script/public-inbox-convert b/script/public-inbox-convert index fbd527a6..800c364c 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -75,7 +75,7 @@ if ($opt->{'index'}) { } local %ENV = (%$env, %ENV) if $env; my $new = { %$old }; -$new->{inboxdir} = PublicInbox::Admin::rel2abs_collapsed($new_dir); +$new->{inboxdir} = $cfg->rel2abs_collapsed($new_dir); $new->{version} = 2; $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); $new->{-no_fsync} = 1 if !$opt->{fsync}; diff --git a/script/public-inbox-init b/script/public-inbox-init index eb605a51..afaa4c12 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -138,7 +138,7 @@ close($fh) or die "failed to close $pi_config_tmp: $!\n"; my $pfx = "publicinbox.$name"; my @x = (qw/git config/, "--file=$pi_config_tmp"); -PublicInbox::Admin::rel2abs_collapsed($inboxdir); +PublicInbox::Config::rel2abs_collapsed($inboxdir); die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0; if (-f "$inboxdir/inbox.lock") { -- cgit v1.2.3-24-ge0c7 From d6d3159d1ae25f67c09dd189e6df36795a3b8bfa Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 23 Dec 2020 23:02:55 +0000 Subject: index: update [extindex "all"] by default, support -E In most cases, this ensures users will only have to opt-in to using -extindex once and won't have to issue extra commands to keep external indices up-to-date when using public-inbox-index. Since we support arbitrary numbers of external indices for ease-of-development, we'll support repeating "-E" ("--update-extindex=") in case users want to test changes in parallel. --- script/public-inbox-index | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'script') diff --git a/script/public-inbox-index b/script/public-inbox-index index 8a61817c..f10bb5ad 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -17,7 +17,7 @@ options: --no-fsync speed up indexing, risk corruption on power outage -L LEVEL `basic', `medium', or `full' (default: full) - -E EIDX update EIDX (e.g. `all') + -E EXTINDEX update extindex (default: `all') --all index all configured inboxes --compact | -c run public-inbox-compact(1) after indexing --sequential-shard index Xapian shards sequentially for slow storage @@ -32,12 +32,16 @@ options: BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) See public-inbox-index(1) man page for full documentation. EOF -my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 }; +my $opt = { + quiet => -1, compact => 0, max_size => undef, fsync => 1, + 'update-extindex' => [], # ":s@" optional arg sets '' if no arg given +}; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune fsync|sync! xapian_only|xapian-only indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s sequential_shard|seq-shard|sequential-shard + no-update-extindex update-extindex|E=s@ skip-docdata all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; @@ -56,7 +60,31 @@ my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die('-index'); unless (@ibxs) { print STDERR $help; exit 1 } +my (@eidx_dir, %eidx_seen); +my $update_extindex = $opt->{'update-extindex'}; +if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) { + # extindex and normal inboxes may have different owners + push(@$update_extindex, 'all') if -w $ALL->{topdir}; +} +@$update_extindex = () if $opt->{'no-update-extindex'}; +if (scalar @$update_extindex) { + PublicInbox::Admin::require_or_die('-search'); + require PublicInbox::ExtSearchIdx; +} +for my $ei_name (@$update_extindex) { + my $es = $cfg->lookup_ei($ei_name); + my $topdir; + if (!$es && -d $ei_name) { # allow dirname or config section name + $topdir = $ei_name; + } elsif ($es) { + $topdir = $es->{topdir}; + } else { + die "extindex `$ei_name' not configured or found\n"; + } + $eidx_seen{$topdir} //= push(@eidx_dir, $topdir); +} my $mods = {}; +my @eidx_unconfigured; foreach my $ibx (@ibxs) { # detect_indexlevel may also set $ibx->{-skip_docdata} my $detected = PublicInbox::Admin::detect_indexlevel($ibx); @@ -64,7 +92,14 @@ foreach my $ibx (@ibxs) { $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ? 'full' : $detected); PublicInbox::Admin::scan_ibx_modules($mods, $ibx); + if (@eidx_dir && $ibx->{-unconfigured}) { + push @eidx_unconfigured, " $ibx->{inboxdir}\n"; + } } +warn <{compact} = 0 if !$mods->{'Search::Xapian'}; @@ -96,4 +131,12 @@ EOL local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard}; PublicInbox::Xapcmd::run($ibx, 'compact', $copt); } + next if $ibx->{-unconfigured}; + last if $ibx_opt->{quit}; + for my $dir (@eidx_dir) { + my $eidx = PublicInbox::ExtSearchIdx->new($dir); + $eidx->attach_inbox($ibx); + $eidx->eidx_sync($ibx_opt); + last if $ibx_opt->{quit}; + } } -- cgit v1.2.3-24-ge0c7 From b3cf37096874c6c80ef554e5153e5d995c72ab95 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 24 Dec 2020 10:09:18 +0000 Subject: inboxwritable: delay umask_prepare calls This simplifies all ->with_umask callers and opens the door for further optimizations to delay/elide process spawning. --- script/public-inbox-convert | 1 - 1 file changed, 1 deletion(-) (limited to 'script') diff --git a/script/public-inbox-convert b/script/public-inbox-convert index 800c364c..e6ee6529 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -80,7 +80,6 @@ $new->{version} = 2; $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); $new->{-no_fsync} = 1 if !$opt->{fsync}; my $v2w; -$old->umask_prepare; sub link_or_copy ($$) { my ($src, $dst) = @_; -- cgit v1.2.3-24-ge0c7 From e8d6c34c749d1b0fd1dc1278cd4a2a310b31a9ac Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 24 Dec 2020 10:09:19 +0000 Subject: index: support --fast-noop / -F switch Note: I'm not sure if it's worth documenting and supporting this long-term. We can can avoid taking locks for invocations of "index --all" and rely on high-resolution ctime (struct timespec st_ctim) comparisons of msgmap.sqlite3 and the packed-refs + refs/heads directory of the newest epoch. This cuts public-inbox-index invocations with "--all --no-update-extindex -L basic" down from 0.92s to 0.31s. The change with "-L medium" or "-L full" and (default) non-zero jobs is even more drastic, reducing a 12-13s no-op invocation down to the same 0.31s --- script/public-inbox-index | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'script') diff --git a/script/public-inbox-index b/script/public-inbox-index index f10bb5ad..91afac88 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -42,7 +42,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune batch_size|batch-size=s sequential_shard|seq-shard|sequential-shard no-update-extindex update-extindex|E=s@ - skip-docdata all help|h)) + fast-noop|F skip-docdata all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; -- cgit v1.2.3-24-ge0c7 From b2e8536cd607d71182d6228c629bca12017ce34c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Dec 2020 01:44:38 +0000 Subject: init: use the return value of rel2abs_collapsed :x Fixes: 9fcce78e40b0a7c6 ("script/public-inbox-*: favor caller-provided pathnames") --- script/public-inbox-init | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'script') diff --git a/script/public-inbox-init b/script/public-inbox-init index afaa4c12..6d538e43 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -138,7 +138,7 @@ close($fh) or die "failed to close $pi_config_tmp: $!\n"; my $pfx = "publicinbox.$name"; my @x = (qw/git config/, "--file=$pi_config_tmp"); -PublicInbox::Config::rel2abs_collapsed($inboxdir); +$inboxdir = PublicInbox::Config::rel2abs_collapsed($inboxdir); die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0; if (-f "$inboxdir/inbox.lock") { -- cgit v1.2.3-24-ge0c7 From 4af931f9ad100b0eca5729e2b1c56b844cf1a1c8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Dec 2020 10:21:09 +0000 Subject: index: disable --fast-noop on --reindex These options make no sense when used together, just inform the user and move on since it's probably harmless to continue. --- script/public-inbox-index | 3 +++ 1 file changed, 3 insertions(+) (limited to 'script') diff --git a/script/public-inbox-index b/script/public-inbox-index index 91afac88..87893ef1 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -49,6 +49,9 @@ die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; if ($opt->{xapian_only} && !$opt->{reindex}) { die "--xapian-only requires --reindex\n"; } +if ($opt->{reindex} && delete($opt->{'fast-noop'})) { + warn "--fast-noop ignored with --reindex\n"; +} # require lazily to speed up --help require PublicInbox::Admin; -- cgit v1.2.3-24-ge0c7 From fb4dd7fdeeed8478cda9b7e63e56564da8cbdacf Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Dec 2020 10:21:12 +0000 Subject: index: do not attach inbox to extindex unless updated We'll count the number of log changes (regardless of index or unindex) and only attach inboxes to ExtSearchIdx objects when they get new work. We'll also reduce lock bouncing and only update external indices after all per-inbox indexing is done. This also updates existing v2 indexing/unindexing callers to be more consistent and ensures unindex log entries update per-inbox last commit information. --- script/public-inbox-index | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'script') diff --git a/script/public-inbox-index b/script/public-inbox-index index 87893ef1..a17bf615 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -63,7 +63,7 @@ my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die('-index'); unless (@ibxs) { print STDERR $help; exit 1 } -my (@eidx_dir, %eidx_seen); +my (@eidx, %eidx_seen); my $update_extindex = $opt->{'update-extindex'}; if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) { # extindex and normal inboxes may have different owners @@ -84,7 +84,8 @@ for my $ei_name (@$update_extindex) { } else { die "extindex `$ei_name' not configured or found\n"; } - $eidx_seen{$topdir} //= push(@eidx_dir, $topdir); + $eidx_seen{$topdir} //= + push(@eidx, PublicInbox::ExtSearchIdx->new($topdir)); } my $mods = {}; my @eidx_unconfigured; @@ -95,7 +96,7 @@ foreach my $ibx (@ibxs) { $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ? 'full' : $detected); PublicInbox::Admin::scan_ibx_modules($mods, $ibx); - if (@eidx_dir && $ibx->{-unconfigured}) { + if (@eidx && $ibx->{-unconfigured}) { push @eidx_unconfigured, " $ibx->{inboxdir}\n"; } } @@ -128,18 +129,22 @@ publicInbox.$ibx->{name}.indexSequentialShard not boolean EOL $ibx_opt = { %$opt, sequential_shard => $v }; } - PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); + my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); last if $ibx_opt->{quit}; if (my $copt = $opt->{compact_opt}) { local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard}; PublicInbox::Xapcmd::run($ibx, 'compact', $copt); } - next if $ibx->{-unconfigured}; last if $ibx_opt->{quit}; - for my $dir (@eidx_dir) { - my $eidx = PublicInbox::ExtSearchIdx->new($dir); + next if $ibx->{-unconfigured} || !$nidx; + for my $eidx (@eidx) { $eidx->attach_inbox($ibx); - $eidx->eidx_sync($ibx_opt); - last if $ibx_opt->{quit}; } } +$opt->{-no_fsync} = 1 if !$opt->{fsync}; +my $pr = $opt->{-progress}; +for my $eidx (@eidx) { + $pr->("indexing $eidx->{topdir} ...\n") if $pr; + $eidx->eidx_sync($opt); + last if $opt->{quit}; +} -- cgit v1.2.3-24-ge0c7 From 66518051763825d491d0c1df6837d4266edc180a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Dec 2020 10:21:13 +0000 Subject: index: fix --no-fsync flag propagation to extindex Negation in flag names are confusing, but trying to deviate from the DB_NO_SYNC name used by Xapian is also confusing. --- script/public-inbox-index | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'script') diff --git a/script/public-inbox-index b/script/public-inbox-index index a17bf615..c68f9224 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -85,7 +85,7 @@ for my $ei_name (@$update_extindex) { die "extindex `$ei_name' not configured or found\n"; } $eidx_seen{$topdir} //= - push(@eidx, PublicInbox::ExtSearchIdx->new($topdir)); + push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $opt)); } my $mods = {}; my @eidx_unconfigured; @@ -141,7 +141,6 @@ EOL $eidx->attach_inbox($ibx); } } -$opt->{-no_fsync} = 1 if !$opt->{fsync}; my $pr = $opt->{-progress}; for my $eidx (@eidx) { $pr->("indexing $eidx->{topdir} ...\n") if $pr; -- cgit v1.2.3-24-ge0c7 From 451ffd3068017ac1ca8bb0b454a65a7f2a3bf407 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Dec 2020 10:21:15 +0000 Subject: index: filter out indexlevel=basic from extindex extindex users will likely want to use indexlevel=basic for per-inbox indices, however extindex itself doesn't support basic index level (yet?). Let's ensure we don't trip up extindex users who specify "-L basic" on the -index command-line. --- script/public-inbox-index | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'script') diff --git a/script/public-inbox-index b/script/public-inbox-index index c68f9224..0fdfddc0 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -84,8 +84,10 @@ for my $ei_name (@$update_extindex) { } else { die "extindex `$ei_name' not configured or found\n"; } + my $o = { %$opt }; + delete $o->{indexlevel} if ($o->{indexlevel}//'') eq 'basic'; $eidx_seen{$topdir} //= - push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $opt)); + push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $o)); } my $mods = {}; my @eidx_unconfigured; -- cgit v1.2.3-24-ge0c7 From 1d96509a3f59c38394d2f3ac4323dc54c74dc202 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Dec 2020 01:44:37 +0000 Subject: extindex: --watch for inotify-based updates This reuses existing InboxIdle infrastructure to update external indices based on per-inbox updates. This is an alternative to auto-updating external indices via the -index command and also works with existing uses of -mda and public-inbox-watch. Using inotify (or EVFILT_VNODE) allows watching thousands of inboxes without having to scan every single one at every invocation. This is especially beneficial in cases where an external index is not writable to the users writing to per-inbox indices. --- script/public-inbox-extindex | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'script') diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index 17ad59fa..607baa3e 100644 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -11,6 +11,7 @@ usage: public-inbox-extindex [options] EXTINDEX_DIR [INBOX_DIR] Create and update external (detached) search indices --no-fsync speed up indexing, risk corruption on power outage + --watch run persistently and watch for inbox updates -L LEVEL `medium', or `full' (default: full) --all index all configured inboxes --jobs=NUM set or disable parallelization (NUM=0) @@ -27,7 +28,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i fsync|sync! indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s - gc + gc commit-interval=i watch all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; @@ -41,7 +42,8 @@ my $cfg = PublicInbox::Config->new; my @ibxs; if ($opt->{gc}) { die "E: inbox paths must not be specified with --gc\n" if @ARGV; - die "E: --all not compatible --gc\n" if $opt->{all}; + die "E: --all not compatible with --gc\n" if $opt->{all}; + die "E: --watch is not compatible with --gc\n" if $opt->{watch}; } else { @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); } @@ -56,6 +58,15 @@ if ($opt->{gc}) { $eidx->attach_config($cfg); $eidx->eidx_gc($opt); } else { - $eidx->attach_inbox($_) for @ibxs; - $eidx->eidx_sync($opt); + if ($opt->{all}) { + $eidx->attach_config($cfg); + } else { + $eidx->attach_inbox($_) for @ibxs; + } + if ($opt->{watch}) { + $cfg = undef; # save memory only after SIGHUP + $eidx->eidx_watch($opt); + } else { + $eidx->eidx_sync($opt); + } } -- cgit v1.2.3-24-ge0c7 From 41464d205ade16a5a847061fa2eb706a33b52a88 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Dec 2020 10:16:22 +0000 Subject: extindex: enable autoflush on STDOUT/STDERR With --watch, the output may be redirected to a pipe or socket which Perl may decide to buffer. Ensure Perl doesn't buffer these outputs since they can provide real-time status updates in response to signals or FS activity. --- script/public-inbox-extindex | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'script') diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index 607baa3e..17986f60 100644 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -33,7 +33,9 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; - +require IO::Handle; +STDOUT->autoflush(1); +STDERR->autoflush(1); # require lazily to speed up --help my $eidx_dir = shift(@ARGV) // die "E: $help"; local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync -- cgit v1.2.3-24-ge0c7 From 02aad3e340d1711359c4def6e91482140a989ce1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Dec 2020 10:16:23 +0000 Subject: extindex: add undocumented --no-scan switch This makes diagnosing --watch problems easier when there's 50K inboxes by avoiding the lengthy scan (which is the reason --watch exists in the first place). --- script/public-inbox-extindex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'script') diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index 17986f60..f4ffda4b 100644 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -23,12 +23,12 @@ usage: public-inbox-extindex [options] EXTINDEX_DIR [INBOX_DIR] BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) See public-inbox-extindex(1) man page for full documentation. EOF -my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 }; +my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i fsync|sync! indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s - gc commit-interval=i watch + gc commit-interval=i watch scan! all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; -- cgit v1.2.3-24-ge0c7 From e411f4465dd26d8b09d005224a8ead7056e6e532 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Dec 2020 10:16:24 +0000 Subject: extindex: allow using --all without EXTINDEX_DIR If "--all" is specified to index all inboxes, implicitly choose the configured [extindex "all"] external index since "--all" is incompatible with specifying inbox directories on the command-line. --- script/public-inbox-extindex | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'script') diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index f4ffda4b..5f27988f 100644 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -6,7 +6,7 @@ use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = <= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; require IO::Handle; STDOUT->autoflush(1); STDERR->autoflush(1); -# require lazily to speed up --help -my $eidx_dir = shift(@ARGV) // die "E: $help"; local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync +# require lazily to speed up --help require PublicInbox::Admin; my $cfg = PublicInbox::Config->new; +my $eidx_dir = shift(@ARGV); +unless (defined $eidx_dir) { + if ($opt->{all} && $cfg->ALL) { + $eidx_dir = $cfg->ALL->{topdir}; + } else { + die "E: $help"; + } +} my @ibxs; if ($opt->{gc}) { die "E: inbox paths must not be specified with --gc\n" if @ARGV; -- cgit v1.2.3-24-ge0c7 From 522bb4a4973d4ac41b83be58dd3257e8cd038744 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Dec 2020 02:53:05 +0000 Subject: check defined return value for localized slurp errors Reading from regular files (even on STDIN) can fail when dealing with flakey storage. --- script/public-inbox-edit | 3 ++- script/public-inbox-init | 6 +----- script/public-inbox-learn | 3 +-- script/public-inbox-purge | 2 +- 4 files changed, 5 insertions(+), 9 deletions(-) (limited to 'script') diff --git a/script/public-inbox-edit b/script/public-inbox-edit index a70614fc..81f023bc 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -183,7 +183,8 @@ retry_edit: # rename/relink $edit_fn open my $new_fh, '<', $edit_fn or die "can't read edited file ($edit_fn): $!\n"; - my $new_raw = do { local $/; <$new_fh> }; + defined(my $new_raw = do { local $/; <$new_fh> }) or die + "read $edit_fn: $!\n"; if (!$opt->{raw}) { # get rid of the From we added diff --git a/script/public-inbox-init b/script/public-inbox-init index 6d538e43..7ac77830 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -100,11 +100,7 @@ if (-e $pi_config) { defined $perm or die "(f)stat failed on $pi_config: $!\n"; chmod($perm & 07777, $fh) or die "(f)chmod failed on future $pi_config: $!\n"; - my $old; - { - local $/; - $old = <$oh>; - } + defined(my $old = do { local $/; <$oh> }) or die "read $pi_config: $!\n"; print $fh $old or die "failed to write: $!\n"; close $oh or die "failed to close $pi_config: $!\n"; diff --git a/script/public-inbox-learn b/script/public-inbox-learn index 9352c8ff..1731a4ba 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -39,8 +39,7 @@ my $spamc = PublicInbox::Spamcheck::Spamc->new; my $pi_cfg = PublicInbox::Config->new; my $err; my $mime = PublicInbox::Eml->new(do{ - local $/; - my $data = ; + defined(my $data = do { local $/; }) or die "read STDIN: $!\n"; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; if ($train ne 'rm') { diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 7bca11ea..52f1f18a 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -32,7 +32,7 @@ if ($opt->{help}) { print $help; exit 0 }; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); PublicInbox::AdminEdit::check_editable(\@ibxs); -my $data = do { local $/; }; +defined(my $data = do { local $/; }) or die "read STDIN: $!\n"; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; my $n_purged = 0; -- cgit v1.2.3-24-ge0c7