From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 2CDAC1F428 for ; Thu, 16 Mar 2023 20:01:32 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1678996892; bh=d+cZEkojnHBwuUMqsvpWdTbBud7kaJ+5QLaNopFhrgw=; h=From:To:Subject:Date:From; b=St+fwdyS9b9fT0yqYGN+CzaYIiaLROb/zO9dhO3GaZmjr8RQ/lu1NMn0hXHKH2ldV sdArwbRyRdyv5f1eTu4NxMadn3qTlFf8s2qeKoWp2ZQo9IDftbbHhQ+aoPVbOTpTq5 3IlFPP4oUIzSvr9DuCVc9RdvFfKQ4EStC2Cg+7m8= From: Eric Wong To: spew@80x24.org Subject: [PATCH 01/10] ipc: move nproc_shards from v2writable Date: Thu, 16 Mar 2023 20:01:22 +0000 Message-Id: <20230316200131.2113244-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We'll be using nproc_shards for indexing non-Inbox stuff. --- lib/PublicInbox/IPC.pm | 26 +++++++++++++++++++++++++- lib/PublicInbox/V2Writable.pm | 26 +------------------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm index 548a72eb..730f2cf6 100644 --- a/lib/PublicInbox/IPC.pm +++ b/lib/PublicInbox/IPC.pm @@ -19,7 +19,7 @@ use PublicInbox::WQWorker; use Socket qw(AF_UNIX MSG_EOR SOCK_STREAM); my $MY_MAX_ARG_STRLEN = 4096 * 33; # extra 4K for serialization my $SEQPACKET = eval { Socket::SOCK_SEQPACKET() }; # portable enough? -our @EXPORT_OK = qw(ipc_freeze ipc_thaw); +our @EXPORT_OK = qw(ipc_freeze ipc_thaw nproc_shards); my ($enc, $dec); # ->imports at BEGIN turns sereal_*_with_object into custom ops on 5.14+ # and eliminate method call overhead @@ -454,4 +454,28 @@ sub detect_nproc () { undef } +# SATA storage lags behind what CPUs are capable of, so relying on +# nproc(1) can be misleading and having extra Xapian shards is a +# waste of FDs and space. It can also lead to excessive IO latency +# and slow things down. Users on NVME or other fast storage can +# use the NPROC env or switches in our script/public-inbox-* programs +# to increase Xapian shards +our $NPROC_MAX_DEFAULT = 4; + +sub nproc_shards ($) { + my ($creat_opt) = @_; + my $n = $creat_opt->{nproc} if ref($creat_opt) eq 'HASH'; + $n //= $ENV{NPROC}; + if (!$n) { + # assume 2 cores if not detectable or zero + state $NPROC_DETECTED = PublicInbox::IPC::detect_nproc() || 2; + $n = $NPROC_DETECTED; + $n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT; + } + + # subtract for the main process and git-fast-import + $n -= 1; + $n < 1 ? 1 : $n; +} + 1; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index ed5182ae..d3d13941 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -8,7 +8,7 @@ use strict; use v5.10.1; use parent qw(PublicInbox::Lock PublicInbox::IPC); use PublicInbox::SearchIdxShard; -use PublicInbox::IPC; +use PublicInbox::IPC qw(nproc_shards); use PublicInbox::Eml; use PublicInbox::Git; use PublicInbox::Import; @@ -29,30 +29,6 @@ my $OID = qr/[a-f0-9]{40,}/; # an estimate of the post-packed size to the raw uncompressed size our $PACKING_FACTOR = 0.4; -# SATA storage lags behind what CPUs are capable of, so relying on -# nproc(1) can be misleading and having extra Xapian shards is a -# waste of FDs and space. It can also lead to excessive IO latency -# and slow things down. Users on NVME or other fast storage can -# use the NPROC env or switches in our script/public-inbox-* programs -# to increase Xapian shards -our $NPROC_MAX_DEFAULT = 4; - -sub nproc_shards ($) { - my ($creat_opt) = @_; - my $n = $creat_opt->{nproc} if ref($creat_opt) eq 'HASH'; - $n //= $ENV{NPROC}; - if (!$n) { - # assume 2 cores if not detectable or zero - state $NPROC_DETECTED = PublicInbox::IPC::detect_nproc() || 2; - $n = $NPROC_DETECTED; - $n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT; - } - - # subtract for the main process and git-fast-import - $n -= 1; - $n < 1 ? 1 : $n; -} - sub count_shards ($) { my ($self) = @_; # always load existing shards in case core count changes: