BPF Archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next 0/6] bench: fast in-kernel triggering benchmarks
@ 2024-03-22  0:00 Andrii Nakryiko
  2024-03-22  0:00 ` [PATCH bpf-next 1/6] selftests/bpf: rename and clean up userspace-triggered benchmarks Andrii Nakryiko
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: Andrii Nakryiko @ 2024-03-22  0:00 UTC (permalink / raw
  To: bpf, ast, daniel, martin.lau; +Cc: andrii, kernel-team

Remove "legacy" triggering benchmarks which rely on syscalls (and thus syscall
overhead is a noticeable part of benchmark, unfortunately). Replace them with
faster versions that rely on triggering BPF programs in-kernel through another
simple "driver" BPF program. See patch #2 with comparison results.

raw_tp/tp/fmodret benchmarks required adding a simple kfunc in kernel to be
able to trigger a simple tracepoint from BPF program (plus we also mark that
function as allowable for fmod_ret programs). This limits raw_tp/tp/fmodret
benchmarks to new kernels only, but it keeps bench tool itself very portable
and most of other benchmarks will still work on wide variety of kernels
without the need to worry about building and deploying custom kernel module.
See patches #5 and #6 for details.

Andrii Nakryiko (6):
  selftests/bpf: rename and clean up userspace-triggered benchmarks
  selftests/bpf: add batched, mostly in-kernel BPF triggering benchmarks
  selftests/bpf: remove syscall-driven benchs, keep syscall-count only
  selftests/bpf: lazy-load trigger bench BPF programs
  bpf: add bpf_test_tp() kfunc triggering tp and allowing error
    injection
  selftests/bpf: add batched tp/raw_tp/fmodret tests

 kernel/bpf/bpf_test.h                         |  34 ++
 kernel/bpf/helpers.c                          |  13 +
 tools/testing/selftests/bpf/bench.c           |  33 +-
 .../selftests/bpf/benchs/bench_trigger.c      | 388 +++++++++---------
 .../selftests/bpf/benchs/run_bench_trigger.sh |  22 +-
 .../selftests/bpf/benchs/run_bench_uprobes.sh |   2 +-
 .../selftests/bpf/progs/trigger_bench.c       |  68 ++-
 7 files changed, 332 insertions(+), 228 deletions(-)
 create mode 100644 kernel/bpf/bpf_test.h

-- 
2.43.0


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 1/6] selftests/bpf: rename and clean up userspace-triggered benchmarks
  2024-03-22  0:00 [PATCH bpf-next 0/6] bench: fast in-kernel triggering benchmarks Andrii Nakryiko
@ 2024-03-22  0:00 ` Andrii Nakryiko
  2024-03-22  0:00 ` [PATCH bpf-next 2/6] selftests/bpf: add batched, mostly in-kernel BPF triggering benchmarks Andrii Nakryiko
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Andrii Nakryiko @ 2024-03-22  0:00 UTC (permalink / raw
  To: bpf, ast, daniel, martin.lau; +Cc: andrii, kernel-team

Rename uprobe-base to more precise usermode-count (it will match other
baseline-like benchmarks, kernel-count and syscall-count). Also use
BENCH_TRIG_USERMODE() macro to define all usermode-based triggering
benchmarks, which include usermode-count and uprobe/uretprobe benchmarks.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/testing/selftests/bpf/bench.c           |  14 ++-
 .../selftests/bpf/benchs/bench_trigger.c      | 106 ++++++------------
 .../selftests/bpf/benchs/run_bench_uprobes.sh |   2 +-
 3 files changed, 49 insertions(+), 73 deletions(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index b2b4c391eb0a..7ca1e1eb5c30 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -491,7 +491,12 @@ extern const struct bench bench_rename_kretprobe;
 extern const struct bench bench_rename_rawtp;
 extern const struct bench bench_rename_fentry;
 extern const struct bench bench_rename_fexit;
+
+/* pure counting benchmarks to establish theoretical lmits */
+extern const struct bench bench_trig_usermode_count;
 extern const struct bench bench_trig_base;
+
+/* kernel-side syscall-triggered benchmarks */
 extern const struct bench bench_trig_tp;
 extern const struct bench bench_trig_rawtp;
 extern const struct bench bench_trig_kprobe;
@@ -502,13 +507,15 @@ extern const struct bench bench_trig_fentry;
 extern const struct bench bench_trig_fexit;
 extern const struct bench bench_trig_fentry_sleep;
 extern const struct bench bench_trig_fmodret;
-extern const struct bench bench_trig_uprobe_base;
+
+/* uprobe/uretprobe benchmarks */
 extern const struct bench bench_trig_uprobe_nop;
 extern const struct bench bench_trig_uretprobe_nop;
 extern const struct bench bench_trig_uprobe_push;
 extern const struct bench bench_trig_uretprobe_push;
 extern const struct bench bench_trig_uprobe_ret;
 extern const struct bench bench_trig_uretprobe_ret;
+
 extern const struct bench bench_rb_libbpf;
 extern const struct bench bench_rb_custom;
 extern const struct bench bench_pb_libbpf;
@@ -539,7 +546,10 @@ static const struct bench *benchs[] = {
 	&bench_rename_rawtp,
 	&bench_rename_fentry,
 	&bench_rename_fexit,
+	/* pure counting benchmarks for establishing theoretical limits */
+	&bench_trig_usermode_count,
 	&bench_trig_base,
+	/* syscall-driven triggering benchmarks */
 	&bench_trig_tp,
 	&bench_trig_rawtp,
 	&bench_trig_kprobe,
@@ -550,7 +560,7 @@ static const struct bench *benchs[] = {
 	&bench_trig_fexit,
 	&bench_trig_fentry_sleep,
 	&bench_trig_fmodret,
-	&bench_trig_uprobe_base,
+	/* uprobes */
 	&bench_trig_uprobe_nop,
 	&bench_trig_uretprobe_nop,
 	&bench_trig_uprobe_push,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index b7aea79495ba..68e703b46d9b 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -12,6 +12,7 @@
 /* BPF triggering benchmarks */
 static struct trigger_ctx {
 	struct trigger_bench *skel;
+	bool usermode_counters;
 } ctx;
 
 static struct counter base_hits[MAX_BUCKETS];
@@ -72,7 +73,10 @@ static void *trigger_producer(void *input)
 
 static void trigger_measure(struct bench_res *res)
 {
-	res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
+	if (ctx.usermode_counters)
+		res->hits = sum_and_reset_counters(base_hits);
+	else
+		res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
 }
 
 static void setup_ctx(void)
@@ -190,7 +194,7 @@ __weak void uprobe_target_ret(void)
 	asm volatile ("");
 }
 
-static void *uprobe_base_producer(void *input)
+static void *uprobe_producer_count(void *input)
 {
 	while (true) {
 		uprobe_target_nop();
@@ -246,32 +250,37 @@ static void usetup(bool use_retprobe, void *target_addr)
 	ctx.skel->links.bench_trigger_uprobe = link;
 }
 
-static void uprobe_setup_nop(void)
+static void usermode_count_setup(void)
+{
+	ctx.usermode_counters = true;
+}
+
+static void uprobe_nop_setup(void)
 {
 	usetup(false, &uprobe_target_nop);
 }
 
-static void uretprobe_setup_nop(void)
+static void uretprobe_nop_setup(void)
 {
 	usetup(true, &uprobe_target_nop);
 }
 
-static void uprobe_setup_push(void)
+static void uprobe_push_setup(void)
 {
 	usetup(false, &uprobe_target_push);
 }
 
-static void uretprobe_setup_push(void)
+static void uretprobe_push_setup(void)
 {
 	usetup(true, &uprobe_target_push);
 }
 
-static void uprobe_setup_ret(void)
+static void uprobe_ret_setup(void)
 {
 	usetup(false, &uprobe_target_ret);
 }
 
-static void uretprobe_setup_ret(void)
+static void uretprobe_ret_setup(void)
 {
 	usetup(true, &uprobe_target_ret);
 }
@@ -385,65 +394,22 @@ const struct bench bench_trig_fmodret = {
 	.report_final = hits_drops_report_final,
 };
 
-const struct bench bench_trig_uprobe_base = {
-	.name = "trig-uprobe-base",
-	.setup = NULL, /* no uprobe/uretprobe is attached */
-	.producer_thread = uprobe_base_producer,
-	.measure = trigger_base_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uprobe_nop = {
-	.name = "trig-uprobe-nop",
-	.setup = uprobe_setup_nop,
-	.producer_thread = uprobe_producer_nop,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uretprobe_nop = {
-	.name = "trig-uretprobe-nop",
-	.setup = uretprobe_setup_nop,
-	.producer_thread = uprobe_producer_nop,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uprobe_push = {
-	.name = "trig-uprobe-push",
-	.setup = uprobe_setup_push,
-	.producer_thread = uprobe_producer_push,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uretprobe_push = {
-	.name = "trig-uretprobe-push",
-	.setup = uretprobe_setup_push,
-	.producer_thread = uprobe_producer_push,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uprobe_ret = {
-	.name = "trig-uprobe-ret",
-	.setup = uprobe_setup_ret,
-	.producer_thread = uprobe_producer_ret,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uretprobe_ret = {
-	.name = "trig-uretprobe-ret",
-	.setup = uretprobe_setup_ret,
-	.producer_thread = uprobe_producer_ret,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
+/* uprobe benchmarks */
+#define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME)			\
+const struct bench bench_trig_##KIND = {				\
+	.name = "trig-" NAME,						\
+	.validate = trigger_validate,					\
+	.setup = KIND##_setup,						\
+	.producer_thread = uprobe_producer_##PRODUCER,			\
+	.measure = trigger_measure,					\
+	.report_progress = hits_drops_report_progress,			\
+	.report_final = hits_drops_report_final,			\
+}
+
+BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");
+BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");
+BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");
+BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
+BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
+BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
+BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
index 9bdcc74e03a4..36021d243397 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@
 
 set -eufo pipefail
 
-for i in base {uprobe,uretprobe}-{nop,push,ret}
+for i in usermode-count base {uprobe,uretprobe}-{nop,push,ret}
 do
 	summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
 	printf "%-15s: %s\n" $i "$summary"
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 2/6] selftests/bpf: add batched, mostly in-kernel BPF triggering benchmarks
  2024-03-22  0:00 [PATCH bpf-next 0/6] bench: fast in-kernel triggering benchmarks Andrii Nakryiko
  2024-03-22  0:00 ` [PATCH bpf-next 1/6] selftests/bpf: rename and clean up userspace-triggered benchmarks Andrii Nakryiko
@ 2024-03-22  0:00 ` Andrii Nakryiko
  2024-03-25 17:21   ` Alexei Starovoitov
  2024-03-22  0:00 ` [PATCH bpf-next 3/6] selftests/bpf: remove syscall-driven benchs, keep syscall-count only Andrii Nakryiko
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Andrii Nakryiko @ 2024-03-22  0:00 UTC (permalink / raw
  To: bpf, ast, daniel, martin.lau; +Cc: andrii, kernel-team, Jiri Olsa

Existing kprobe/fentry triggering benchmarks have 1-to-1 mapping between
one syscall execution and BPF program run. While we use a fast
get_pgid() syscall, syscall overhead can still be non-trivial.

This patch adds kprobe/fentry set of benchmarks significantly amortizing
the cost of syscall vs actual BPF triggering overhead. We do this by
employing BPF_PROG_TEST_RUN command to trigger "driver" raw_tp program
which does a tight parameterized loop calling cheap BPF helper
(bpf_get_numa_node_id()), to which kprobe/fentry programs are
attached for benchmarking.

This way 1 bpf() syscall causes N executions of BPF program being
benchmarked. N defaults to 100, but can be adjusted with
--trig-batch-iters CLI argument.

For comparison we also implement a new baseline program that instead of
triggering another BPF program just does N atomic per-CPU counter
increments, establishing the limit for all other types of program within
this batched benchmarking setup.

Taking the final set of benchmarks added in this patch set (including
tp/raw_tp/fmodret, added in later patch), and keeping for now "legacy"
syscall-driven benchmarks, we can capture all triggering benchmarks in
one place for comparison, before we remove the legacy ones (and rename
xxx-batched into just xxx).

$ benchs/run_bench_trigger.sh
usermode-count       :   79.500 ± 0.024M/s
kernel-count         :   49.949 ± 0.081M/s
syscall-count        :    9.009 ± 0.007M/s

fentry-batch         :   31.002 ± 0.015M/s
fexit-batch          :   20.372 ± 0.028M/s
fmodret-batch        :   21.651 ± 0.659M/s
rawtp-batch          :   36.775 ± 0.264M/s
tp-batch             :   19.411 ± 0.248M/s
kprobe-batch         :   12.949 ± 0.220M/s
kprobe-multi-batch   :   15.400 ± 0.007M/s
kretprobe-batch      :    5.559 ± 0.011M/s
kretprobe-multi-batch:    5.861 ± 0.003M/s

fentry-legacy        :    8.329 ± 0.004M/s
fexit-legacy         :    6.239 ± 0.003M/s
fmodret-legacy       :    6.595 ± 0.001M/s
rawtp-legacy         :    8.305 ± 0.004M/s
tp-legacy            :    6.382 ± 0.001M/s
kprobe-legacy        :    5.528 ± 0.003M/s
kprobe-multi-legacy  :    5.864 ± 0.022M/s
kretprobe-legacy     :    3.081 ± 0.001M/s
kretprobe-multi-legacy:   3.193 ± 0.001M/s

Note how xxx-batch variants are measured with significantly higher
throughput, even though it's exactly the same in-kernel overhead. As
such, results can be compared only between benchmarks of the same kind
(syscall vs batched):

fentry-legacy        :    8.329 ± 0.004M/s
fentry-batch         :   31.002 ± 0.015M/s

kprobe-multi-legacy  :    5.864 ± 0.022M/s
kprobe-multi-batch   :   15.400 ± 0.007M/s

Note also that syscall-count is setting a theoretical limit for
syscall-triggered benchmarks, while kernel-count is setting similar
limits for batch variants. usermode-count is a happy and unachievable
case of user space counting without doing any syscalls, and is mostly
the measure of CPU speed for such a trivial benchmark.

As was mentioned, tp/raw_tp/fmodret require kernel-side kfunc to produce
similar benchmark, which we address in a separate patch.

Note that run_bench_trigger.sh allows to override a list of benchmarks
to run, which is very useful for performance work.

Cc: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/testing/selftests/bpf/bench.c           |  21 ++-
 .../selftests/bpf/benchs/bench_trigger.c      | 130 +++++++++++++++++-
 .../selftests/bpf/benchs/run_bench_trigger.sh |  24 +++-
 .../selftests/bpf/progs/trigger_bench.c       |  67 ++++++++-
 4 files changed, 235 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 7ca1e1eb5c30..484bcbeaa819 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -280,6 +280,7 @@ extern struct argp bench_strncmp_argp;
 extern struct argp bench_hashmap_lookup_argp;
 extern struct argp bench_local_storage_create_argp;
 extern struct argp bench_htab_mem_argp;
+extern struct argp bench_trigger_batch_argp;
 
 static const struct argp_child bench_parsers[] = {
 	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -292,6 +293,7 @@ static const struct argp_child bench_parsers[] = {
 	{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
 	{ &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
 	{ &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
+	{ &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
 	{},
 };
 
@@ -508,6 +510,15 @@ extern const struct bench bench_trig_fexit;
 extern const struct bench bench_trig_fentry_sleep;
 extern const struct bench bench_trig_fmodret;
 
+/* batched, staying mostly in-kernel benchmarks */
+extern const struct bench bench_trig_kernel_count;
+extern const struct bench bench_trig_kprobe_batch;
+extern const struct bench bench_trig_kretprobe_batch;
+extern const struct bench bench_trig_kprobe_multi_batch;
+extern const struct bench bench_trig_kretprobe_multi_batch;
+extern const struct bench bench_trig_fentry_batch;
+extern const struct bench bench_trig_fexit_batch;
+
 /* uprobe/uretprobe benchmarks */
 extern const struct bench bench_trig_uprobe_nop;
 extern const struct bench bench_trig_uretprobe_nop;
@@ -548,7 +559,7 @@ static const struct bench *benchs[] = {
 	&bench_rename_fexit,
 	/* pure counting benchmarks for establishing theoretical limits */
 	&bench_trig_usermode_count,
-	&bench_trig_base,
+	&bench_trig_kernel_count,
 	/* syscall-driven triggering benchmarks */
 	&bench_trig_tp,
 	&bench_trig_rawtp,
@@ -560,6 +571,13 @@ static const struct bench *benchs[] = {
 	&bench_trig_fexit,
 	&bench_trig_fentry_sleep,
 	&bench_trig_fmodret,
+	/* batched, staying mostly in-kernel triggers */
+	&bench_trig_kprobe_batch,
+	&bench_trig_kretprobe_batch,
+	&bench_trig_kprobe_multi_batch,
+	&bench_trig_kretprobe_multi_batch,
+	&bench_trig_fentry_batch,
+	&bench_trig_fexit_batch,
 	/* uprobes */
 	&bench_trig_uprobe_nop,
 	&bench_trig_uretprobe_nop,
@@ -567,6 +585,7 @@ static const struct bench *benchs[] = {
 	&bench_trig_uretprobe_push,
 	&bench_trig_uprobe_ret,
 	&bench_trig_uretprobe_ret,
+	/* ringbuf/perfbuf benchmarks */
 	&bench_rb_libbpf,
 	&bench_rb_custom,
 	&bench_pb_libbpf,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 68e703b46d9b..21fa1a84ecd2 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -1,11 +1,54 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
 #define _GNU_SOURCE
+#include <argp.h>
 #include <unistd.h>
+#include <stdint.h>
 #include "bench.h"
 #include "trigger_bench.skel.h"
 #include "trace_helpers.h"
 
+static struct {
+	__u32 batch_iters;
+} args = {
+	.batch_iters = 100,
+};
+
+enum {
+	ARG_TRIG_BATCH_ITERS = 7000,
+};
+
+static const struct argp_option opts[] = {
+	{ "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
+		"Number of in-kernel iterations per one driver test run"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	long ret;
+
+	switch (key) {
+	case ARG_TRIG_BATCH_ITERS:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > UINT_MAX) {
+			fprintf(stderr, "invalid --trig-batch-iters value");
+			argp_usage(state);
+		}
+		args.batch_iters = ret;
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+const struct argp bench_trigger_batch_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
+
 /* adjust slot shift in inc_hits() if changing */
 #define MAX_BUCKETS 256
 
@@ -13,6 +56,7 @@
 static struct trigger_ctx {
 	struct trigger_bench *skel;
 	bool usermode_counters;
+	int driver_prog_fd;
 } ctx;
 
 static struct counter base_hits[MAX_BUCKETS];
@@ -71,6 +115,16 @@ static void *trigger_producer(void *input)
 	return NULL;
 }
 
+static void *trigger_producer_batch(void *input)
+{
+	int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
+
+	while (true)
+		bpf_prog_test_run_opts(fd, NULL);
+
+	return NULL;
+}
+
 static void trigger_measure(struct bench_res *res)
 {
 	if (ctx.usermode_counters)
@@ -81,13 +135,23 @@ static void trigger_measure(struct bench_res *res)
 
 static void setup_ctx(void)
 {
+	int err;
+
 	setup_libbpf();
 
-	ctx.skel = trigger_bench__open_and_load();
+	ctx.skel = trigger_bench__open();
 	if (!ctx.skel) {
 		fprintf(stderr, "failed to open skeleton\n");
 		exit(1);
 	}
+
+	ctx.skel->rodata->batch_iters = args.batch_iters;
+
+	err = trigger_bench__load(ctx.skel);
+	if (err) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
 }
 
 static void attach_bpf(struct bpf_program *prog)
@@ -161,6 +225,50 @@ static void trigger_fmodret_setup(void)
 	attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
 }
 
+/* Batched, staying mostly in-kernel triggering setups */
+static void trigger_kernel_count_setup(void)
+{
+	setup_ctx();
+	/* override driver program */
+	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
+}
+
+static void trigger_kprobe_batch_setup(void)
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_kprobe_batch);
+}
+
+static void trigger_kretprobe_batch_setup(void)
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_batch);
+}
+
+static void trigger_kprobe_multi_batch_setup(void)
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi_batch);
+}
+
+static void trigger_kretprobe_multi_batch_setup(void)
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi_batch);
+}
+
+static void trigger_fentry_batch_setup(void)
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_fentry_batch);
+}
+
+static void trigger_fexit_batch_setup(void)
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_fexit_batch);
+}
+
 /* make sure call is not inlined and not avoided by compiler, so __weak and
  * inline asm volatile in the body of the function
  *
@@ -394,6 +502,26 @@ const struct bench bench_trig_fmodret = {
 	.report_final = hits_drops_report_final,
 };
 
+/* batched (staying mostly in kernel) kprobe/fentry benchmarks */
+#define BENCH_TRIG_BATCH(KIND, NAME)					\
+const struct bench bench_trig_##KIND = {				\
+	.name = "trig-" NAME,						\
+	.setup = trigger_##KIND##_setup,				\
+	.producer_thread = trigger_producer_batch,			\
+	.measure = trigger_measure,					\
+	.report_progress = hits_drops_report_progress,			\
+	.report_final = hits_drops_report_final,			\
+	.argp = &bench_trigger_batch_argp,				\
+}
+
+BENCH_TRIG_BATCH(kernel_count, "kernel-count");
+BENCH_TRIG_BATCH(kprobe_batch, "kprobe-batch");
+BENCH_TRIG_BATCH(kretprobe_batch, "kretprobe-batch");
+BENCH_TRIG_BATCH(kprobe_multi_batch, "kprobe-multi-batch");
+BENCH_TRIG_BATCH(kretprobe_multi_batch, "kretprobe-multi-batch");
+BENCH_TRIG_BATCH(fentry_batch, "fentry-batch");
+BENCH_TRIG_BATCH(fexit_batch, "fexit-batch");
+
 /* uprobe benchmarks */
 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME)			\
 const struct bench bench_trig_##KIND = {				\
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index 78e83f243294..b58ec33ea18c 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -2,8 +2,24 @@
 
 set -eufo pipefail
 
-for i in base tp rawtp kprobe fentry fmodret
-do
-	summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
-	printf "%-10s: %s\n" $i "$summary"
+def_tests=( \
+	usermode-count kernel-count syscall-count \
+	fentry-batch fexit-batch \
+	kprobe-batch kprobe-multi-batch \
+	kretprobe-batch kretprobe-multi-batch \
+	fentry fexit fmodret \
+	rawtp tp \
+	kprobe kprobe-multi kretprobe kretprobe-multi \
+)
+
+tests=("$@")
+if [ ${#tests[@]} -eq 0 ]; then
+	tests=("${def_tests[@]}")
+fi
+
+p=${PROD_CNT:-1}
+
+for t in "${tests[@]}"; do
+	summary=$(sudo ./bench -w2 -d5 -a -p$p trig-$t | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+	printf "%-21s: %s\n" $t "$summary"
 done
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 42ec202015ed..f0b76afa5017 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2020 Facebook
-
 #include <linux/bpf.h>
 #include <asm/unistd.h>
 #include <bpf/bpf_helpers.h>
@@ -103,3 +102,69 @@ int bench_trigger_uprobe(void *ctx)
 	inc_counter();
 	return 0;
 }
+
+const volatile int batch_iters = 0;
+
+SEC("raw_tp")
+int trigger_count(void *ctx)
+{
+	int i;
+
+	for (i = 0; i < batch_iters; i++)
+		inc_counter();
+
+	return 0;
+}
+
+SEC("raw_tp")
+int trigger_driver(void *ctx)
+{
+	int i;
+
+	for (i = 0; i < batch_iters; i++)
+		(void)bpf_get_numa_node_id(); /* attach point for benchmarking */
+
+	return 0;
+}
+
+SEC("kprobe/bpf_get_numa_node_id")
+int bench_trigger_kprobe_batch(void *ctx)
+{
+	inc_counter();
+	return 0;
+}
+
+SEC("kretprobe/bpf_get_numa_node_id")
+int bench_trigger_kretprobe_batch(void *ctx)
+{
+	inc_counter();
+	return 0;
+}
+
+SEC("kprobe.multi/bpf_get_numa_node_id")
+int bench_trigger_kprobe_multi_batch(void *ctx)
+{
+	inc_counter();
+	return 0;
+}
+
+SEC("kretprobe.multi/bpf_get_numa_node_id")
+int bench_trigger_kretprobe_multi_batch(void *ctx)
+{
+	inc_counter();
+	return 0;
+}
+
+SEC("fentry/bpf_get_numa_node_id")
+int bench_trigger_fentry_batch(void *ctx)
+{
+	inc_counter();
+	return 0;
+}
+
+SEC("fexit/bpf_get_numa_node_id")
+int bench_trigger_fexit_batch(void *ctx)
+{
+	inc_counter();
+	return 0;
+}
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 3/6] selftests/bpf: remove syscall-driven benchs, keep syscall-count only
  2024-03-22  0:00 [PATCH bpf-next 0/6] bench: fast in-kernel triggering benchmarks Andrii Nakryiko
  2024-03-22  0:00 ` [PATCH bpf-next 1/6] selftests/bpf: rename and clean up userspace-triggered benchmarks Andrii Nakryiko
  2024-03-22  0:00 ` [PATCH bpf-next 2/6] selftests/bpf: add batched, mostly in-kernel BPF triggering benchmarks Andrii Nakryiko
@ 2024-03-22  0:00 ` Andrii Nakryiko
  2024-03-22  0:00 ` [PATCH bpf-next 4/6] selftests/bpf: lazy-load trigger bench BPF programs Andrii Nakryiko
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Andrii Nakryiko @ 2024-03-22  0:00 UTC (permalink / raw
  To: bpf, ast, daniel, martin.lau; +Cc: andrii, kernel-team

Remove "legacy" benchmarks triggered by syscalls in favor of newly added
in-kernel/batched benchmarks. Drop -batched suffix now as well.
Next patch will restore "feature parity" by adding back
tp/raw_tp/fmodret benchmarks based on in-kernel kfunc approach.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/testing/selftests/bpf/bench.c           |  32 +--
 .../selftests/bpf/benchs/bench_trigger.c      | 213 +++---------------
 .../selftests/bpf/benchs/run_bench_trigger.sh |  11 +-
 .../selftests/bpf/benchs/run_bench_uprobes.sh |   2 +-
 .../selftests/bpf/progs/trigger_bench.c       |  83 +------
 5 files changed, 42 insertions(+), 299 deletions(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 484bcbeaa819..8b16841a3dec 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -496,28 +496,16 @@ extern const struct bench bench_rename_fexit;
 
 /* pure counting benchmarks to establish theoretical lmits */
 extern const struct bench bench_trig_usermode_count;
-extern const struct bench bench_trig_base;
+extern const struct bench bench_trig_syscall_count;
+extern const struct bench bench_trig_kernel_count;
 
-/* kernel-side syscall-triggered benchmarks */
-extern const struct bench bench_trig_tp;
-extern const struct bench bench_trig_rawtp;
+/* batched, staying mostly in-kernel benchmarks */
 extern const struct bench bench_trig_kprobe;
 extern const struct bench bench_trig_kretprobe;
 extern const struct bench bench_trig_kprobe_multi;
 extern const struct bench bench_trig_kretprobe_multi;
 extern const struct bench bench_trig_fentry;
 extern const struct bench bench_trig_fexit;
-extern const struct bench bench_trig_fentry_sleep;
-extern const struct bench bench_trig_fmodret;
-
-/* batched, staying mostly in-kernel benchmarks */
-extern const struct bench bench_trig_kernel_count;
-extern const struct bench bench_trig_kprobe_batch;
-extern const struct bench bench_trig_kretprobe_batch;
-extern const struct bench bench_trig_kprobe_multi_batch;
-extern const struct bench bench_trig_kretprobe_multi_batch;
-extern const struct bench bench_trig_fentry_batch;
-extern const struct bench bench_trig_fexit_batch;
 
 /* uprobe/uretprobe benchmarks */
 extern const struct bench bench_trig_uprobe_nop;
@@ -560,24 +548,14 @@ static const struct bench *benchs[] = {
 	/* pure counting benchmarks for establishing theoretical limits */
 	&bench_trig_usermode_count,
 	&bench_trig_kernel_count,
-	/* syscall-driven triggering benchmarks */
-	&bench_trig_tp,
-	&bench_trig_rawtp,
+	&bench_trig_syscall_count,
+	/* batched, staying mostly in-kernel triggers */
 	&bench_trig_kprobe,
 	&bench_trig_kretprobe,
 	&bench_trig_kprobe_multi,
 	&bench_trig_kretprobe_multi,
 	&bench_trig_fentry,
 	&bench_trig_fexit,
-	&bench_trig_fentry_sleep,
-	&bench_trig_fmodret,
-	/* batched, staying mostly in-kernel triggers */
-	&bench_trig_kprobe_batch,
-	&bench_trig_kretprobe_batch,
-	&bench_trig_kprobe_multi_batch,
-	&bench_trig_kretprobe_multi_batch,
-	&bench_trig_fentry_batch,
-	&bench_trig_fexit_batch,
 	/* uprobes */
 	&bench_trig_uprobe_nop,
 	&bench_trig_uretprobe_nop,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 21fa1a84ecd2..567aad2d6f09 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -94,24 +94,17 @@ static void trigger_validate(void)
 	}
 }
 
-static void *trigger_base_producer(void *input)
-{
-	while (true) {
-		(void)syscall(__NR_getpgid);
-		inc_counter(base_hits);
-	}
-	return NULL;
-}
-
-static void trigger_base_measure(struct bench_res *res)
-{
-	res->hits = sum_and_reset_counters(base_hits);
-}
-
 static void *trigger_producer(void *input)
 {
-	while (true)
-		(void)syscall(__NR_getpgid);
+	if (ctx.usermode_counters) {
+		while (true) {
+			(void)syscall(__NR_getpgid);
+			inc_counter(base_hits);
+		}
+	} else {
+		while (true)
+			(void)syscall(__NR_getpgid);
+	}
 	return NULL;
 }
 
@@ -165,16 +158,17 @@ static void attach_bpf(struct bpf_program *prog)
 	}
 }
 
-static void trigger_tp_setup(void)
+static void trigger_syscall_count_setup(void)
 {
-	setup_ctx();
-	attach_bpf(ctx.skel->progs.bench_trigger_tp);
+	ctx.usermode_counters = true;
 }
 
-static void trigger_rawtp_setup(void)
+/* Batched, staying mostly in-kernel triggering setups */
+static void trigger_kernel_count_setup(void)
 {
 	setup_ctx();
-	attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
+	/* override driver program */
+	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
 }
 
 static void trigger_kprobe_setup(void)
@@ -213,62 +207,6 @@ static void trigger_fexit_setup(void)
 	attach_bpf(ctx.skel->progs.bench_trigger_fexit);
 }
 
-static void trigger_fentry_sleep_setup(void)
-{
-	setup_ctx();
-	attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
-}
-
-static void trigger_fmodret_setup(void)
-{
-	setup_ctx();
-	attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
-}
-
-/* Batched, staying mostly in-kernel triggering setups */
-static void trigger_kernel_count_setup(void)
-{
-	setup_ctx();
-	/* override driver program */
-	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
-}
-
-static void trigger_kprobe_batch_setup(void)
-{
-	setup_ctx();
-	attach_bpf(ctx.skel->progs.bench_trigger_kprobe_batch);
-}
-
-static void trigger_kretprobe_batch_setup(void)
-{
-	setup_ctx();
-	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_batch);
-}
-
-static void trigger_kprobe_multi_batch_setup(void)
-{
-	setup_ctx();
-	attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi_batch);
-}
-
-static void trigger_kretprobe_multi_batch_setup(void)
-{
-	setup_ctx();
-	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi_batch);
-}
-
-static void trigger_fentry_batch_setup(void)
-{
-	setup_ctx();
-	attach_bpf(ctx.skel->progs.bench_trigger_fentry_batch);
-}
-
-static void trigger_fexit_batch_setup(void)
-{
-	setup_ctx();
-	attach_bpf(ctx.skel->progs.bench_trigger_fexit_batch);
-}
-
 /* make sure call is not inlined and not avoided by compiler, so __weak and
  * inline asm volatile in the body of the function
  *
@@ -393,109 +331,10 @@ static void uretprobe_ret_setup(void)
 	usetup(true, &uprobe_target_ret);
 }
 
-const struct bench bench_trig_base = {
-	.name = "trig-base",
-	.validate = trigger_validate,
-	.producer_thread = trigger_base_producer,
-	.measure = trigger_base_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_tp = {
-	.name = "trig-tp",
-	.validate = trigger_validate,
-	.setup = trigger_tp_setup,
-	.producer_thread = trigger_producer,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_rawtp = {
-	.name = "trig-rawtp",
-	.validate = trigger_validate,
-	.setup = trigger_rawtp_setup,
-	.producer_thread = trigger_producer,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_kprobe = {
-	.name = "trig-kprobe",
-	.validate = trigger_validate,
-	.setup = trigger_kprobe_setup,
-	.producer_thread = trigger_producer,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_kretprobe = {
-	.name = "trig-kretprobe",
-	.validate = trigger_validate,
-	.setup = trigger_kretprobe_setup,
-	.producer_thread = trigger_producer,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_kprobe_multi = {
-	.name = "trig-kprobe-multi",
-	.validate = trigger_validate,
-	.setup = trigger_kprobe_multi_setup,
-	.producer_thread = trigger_producer,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_kretprobe_multi = {
-	.name = "trig-kretprobe-multi",
-	.validate = trigger_validate,
-	.setup = trigger_kretprobe_multi_setup,
-	.producer_thread = trigger_producer,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fentry = {
-	.name = "trig-fentry",
-	.validate = trigger_validate,
-	.setup = trigger_fentry_setup,
-	.producer_thread = trigger_producer,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fexit = {
-	.name = "trig-fexit",
-	.validate = trigger_validate,
-	.setup = trigger_fexit_setup,
-	.producer_thread = trigger_producer,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fentry_sleep = {
-	.name = "trig-fentry-sleep",
-	.validate = trigger_validate,
-	.setup = trigger_fentry_sleep_setup,
-	.producer_thread = trigger_producer,
-	.measure = trigger_measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fmodret = {
-	.name = "trig-fmodret",
+const struct bench bench_trig_syscall_count = {
+	.name = "trig-syscall-count",
 	.validate = trigger_validate,
-	.setup = trigger_fmodret_setup,
+	.setup = trigger_syscall_count_setup,
 	.producer_thread = trigger_producer,
 	.measure = trigger_measure,
 	.report_progress = hits_drops_report_progress,
@@ -503,7 +342,7 @@ const struct bench bench_trig_fmodret = {
 };
 
 /* batched (staying mostly in kernel) kprobe/fentry benchmarks */
-#define BENCH_TRIG_BATCH(KIND, NAME)					\
+#define BENCH_TRIG_KERNEL(KIND, NAME)					\
 const struct bench bench_trig_##KIND = {				\
 	.name = "trig-" NAME,						\
 	.setup = trigger_##KIND##_setup,				\
@@ -514,13 +353,13 @@ const struct bench bench_trig_##KIND = {				\
 	.argp = &bench_trigger_batch_argp,				\
 }
 
-BENCH_TRIG_BATCH(kernel_count, "kernel-count");
-BENCH_TRIG_BATCH(kprobe_batch, "kprobe-batch");
-BENCH_TRIG_BATCH(kretprobe_batch, "kretprobe-batch");
-BENCH_TRIG_BATCH(kprobe_multi_batch, "kprobe-multi-batch");
-BENCH_TRIG_BATCH(kretprobe_multi_batch, "kretprobe-multi-batch");
-BENCH_TRIG_BATCH(fentry_batch, "fentry-batch");
-BENCH_TRIG_BATCH(fexit_batch, "fexit-batch");
+BENCH_TRIG_KERNEL(kernel_count, "kernel-count");
+BENCH_TRIG_KERNEL(kprobe, "kprobe");
+BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
+BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
+BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
+BENCH_TRIG_KERNEL(fentry, "fentry");
+BENCH_TRIG_KERNEL(fexit, "fexit");
 
 /* uprobe benchmarks */
 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME)			\
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index b58ec33ea18c..6e790e294260 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -4,12 +4,9 @@ set -eufo pipefail
 
 def_tests=( \
 	usermode-count kernel-count syscall-count \
-	fentry-batch fexit-batch \
-	kprobe-batch kprobe-multi-batch \
-	kretprobe-batch kretprobe-multi-batch \
-	fentry fexit fmodret \
-	rawtp tp \
-	kprobe kprobe-multi kretprobe kretprobe-multi \
+	fentry fexit \
+	kprobe kprobe-multi \
+	kretprobe kretprobe-multi \
 )
 
 tests=("$@")
@@ -21,5 +18,5 @@ p=${PROD_CNT:-1}
 
 for t in "${tests[@]}"; do
 	summary=$(sudo ./bench -w2 -d5 -a -p$p trig-$t | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
-	printf "%-21s: %s\n" $t "$summary"
+	printf "%-15s: %s\n" $t "$summary"
 done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
index 36021d243397..af169f831f2f 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@
 
 set -eufo pipefail
 
-for i in usermode-count base {uprobe,uretprobe}-{nop,push,ret}
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret}
 do
 	summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
 	printf "%-15s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index f0b76afa5017..81990e45b547 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -25,77 +25,6 @@ static __always_inline void inc_counter(void)
 	__sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1);
 }
 
-SEC("tp/syscalls/sys_enter_getpgid")
-int bench_trigger_tp(void *ctx)
-{
-	inc_counter();
-	return 0;
-}
-
-SEC("raw_tp/sys_enter")
-int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
-{
-	if (id == __NR_getpgid)
-		inc_counter();
-	return 0;
-}
-
-SEC("kprobe/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_kprobe(void *ctx)
-{
-	inc_counter();
-	return 0;
-}
-
-SEC("kretprobe/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_kretprobe(void *ctx)
-{
-	inc_counter();
-	return 0;
-}
-
-SEC("kprobe.multi/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_kprobe_multi(void *ctx)
-{
-	inc_counter();
-	return 0;
-}
-
-SEC("kretprobe.multi/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_kretprobe_multi(void *ctx)
-{
-	inc_counter();
-	return 0;
-}
-
-SEC("fentry/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_fentry(void *ctx)
-{
-	inc_counter();
-	return 0;
-}
-
-SEC("fexit/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_fexit(void *ctx)
-{
-	inc_counter();
-	return 0;
-}
-
-SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_fentry_sleep(void *ctx)
-{
-	inc_counter();
-	return 0;
-}
-
-SEC("fmod_ret/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_fmodret(void *ctx)
-{
-	inc_counter();
-	return -22;
-}
-
 SEC("uprobe")
 int bench_trigger_uprobe(void *ctx)
 {
@@ -128,42 +57,42 @@ int trigger_driver(void *ctx)
 }
 
 SEC("kprobe/bpf_get_numa_node_id")
-int bench_trigger_kprobe_batch(void *ctx)
+int bench_trigger_kprobe(void *ctx)
 {
 	inc_counter();
 	return 0;
 }
 
 SEC("kretprobe/bpf_get_numa_node_id")
-int bench_trigger_kretprobe_batch(void *ctx)
+int bench_trigger_kretprobe(void *ctx)
 {
 	inc_counter();
 	return 0;
 }
 
 SEC("kprobe.multi/bpf_get_numa_node_id")
-int bench_trigger_kprobe_multi_batch(void *ctx)
+int bench_trigger_kprobe_multi(void *ctx)
 {
 	inc_counter();
 	return 0;
 }
 
 SEC("kretprobe.multi/bpf_get_numa_node_id")
-int bench_trigger_kretprobe_multi_batch(void *ctx)
+int bench_trigger_kretprobe_multi(void *ctx)
 {
 	inc_counter();
 	return 0;
 }
 
 SEC("fentry/bpf_get_numa_node_id")
-int bench_trigger_fentry_batch(void *ctx)
+int bench_trigger_fentry(void *ctx)
 {
 	inc_counter();
 	return 0;
 }
 
 SEC("fexit/bpf_get_numa_node_id")
-int bench_trigger_fexit_batch(void *ctx)
+int bench_trigger_fexit(void *ctx)
 {
 	inc_counter();
 	return 0;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 4/6] selftests/bpf: lazy-load trigger bench BPF programs
  2024-03-22  0:00 [PATCH bpf-next 0/6] bench: fast in-kernel triggering benchmarks Andrii Nakryiko
                   ` (2 preceding siblings ...)
  2024-03-22  0:00 ` [PATCH bpf-next 3/6] selftests/bpf: remove syscall-driven benchs, keep syscall-count only Andrii Nakryiko
@ 2024-03-22  0:00 ` Andrii Nakryiko
  2024-03-22  0:00 ` [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection Andrii Nakryiko
  2024-03-22  0:00 ` [PATCH bpf-next 6/6] selftests/bpf: add batched tp/raw_tp/fmodret tests Andrii Nakryiko
  5 siblings, 0 replies; 16+ messages in thread
From: Andrii Nakryiko @ 2024-03-22  0:00 UTC (permalink / raw
  To: bpf, ast, daniel, martin.lau; +Cc: andrii, kernel-team

Instead of front-loading all possible benchmarking BPF programs for
trigger benchmarks, explicitly specify which BPF programs are used by
specific benchmark and load only it.

This allows to be more flexible in supporting older kernels, where some
program types might not be possible to load (e.g., those that rely on
newly added kfunc).

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 .../selftests/bpf/benchs/bench_trigger.c      | 36 +++++++++++++++++--
 .../selftests/bpf/progs/trigger_bench.c       | 18 +++++-----
 2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 567aad2d6f09..e454d7de2252 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -128,8 +128,6 @@ static void trigger_measure(struct bench_res *res)
 
 static void setup_ctx(void)
 {
-	int err;
-
 	setup_libbpf();
 
 	ctx.skel = trigger_bench__open();
@@ -138,7 +136,15 @@ static void setup_ctx(void)
 		exit(1);
 	}
 
+	/* default "driver" BPF program */
+	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
+
 	ctx.skel->rodata->batch_iters = args.batch_iters;
+}
+
+static void load_ctx(void)
+{
+	int err;
 
 	err = trigger_bench__load(ctx.skel);
 	if (err) {
@@ -167,6 +173,9 @@ static void trigger_syscall_count_setup(void)
 static void trigger_kernel_count_setup(void)
 {
 	setup_ctx();
+	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+	bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
+	load_ctx();
 	/* override driver program */
 	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
 }
@@ -174,36 +183,48 @@ static void trigger_kernel_count_setup(void)
 static void trigger_kprobe_setup(void)
 {
 	setup_ctx();
+	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);
+	load_ctx();
 	attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
 }
 
 static void trigger_kretprobe_setup(void)
 {
 	setup_ctx();
+	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);
+	load_ctx();
 	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
 }
 
 static void trigger_kprobe_multi_setup(void)
 {
 	setup_ctx();
+	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);
+	load_ctx();
 	attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
 }
 
 static void trigger_kretprobe_multi_setup(void)
 {
 	setup_ctx();
+	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);
+	load_ctx();
 	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
 }
 
 static void trigger_fentry_setup(void)
 {
 	setup_ctx();
+	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);
+	load_ctx();
 	attach_bpf(ctx.skel->progs.bench_trigger_fentry);
 }
 
 static void trigger_fexit_setup(void)
 {
 	setup_ctx();
+	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);
+	load_ctx();
 	attach_bpf(ctx.skel->progs.bench_trigger_fexit);
 }
 
@@ -274,15 +295,24 @@ static void usetup(bool use_retprobe, void *target_addr)
 {
 	size_t uprobe_offset;
 	struct bpf_link *link;
+	int err;
 
 	setup_libbpf();
 
-	ctx.skel = trigger_bench__open_and_load();
+	ctx.skel = trigger_bench__open();
 	if (!ctx.skel) {
 		fprintf(stderr, "failed to open skeleton\n");
 		exit(1);
 	}
 
+	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
+
+	err = trigger_bench__load(ctx.skel);
+	if (err) {
+		fprintf(stderr, "failed to load skeleton\n");
+		exit(1);
+	}
+
 	uprobe_offset = get_uprobe_offset(target_addr);
 	link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
 					  use_retprobe,
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 81990e45b547..07587cb3c9f5 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -25,7 +25,7 @@ static __always_inline void inc_counter(void)
 	__sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1);
 }
 
-SEC("uprobe")
+SEC("?uprobe")
 int bench_trigger_uprobe(void *ctx)
 {
 	inc_counter();
@@ -34,7 +34,7 @@ int bench_trigger_uprobe(void *ctx)
 
 const volatile int batch_iters = 0;
 
-SEC("raw_tp")
+SEC("?raw_tp")
 int trigger_count(void *ctx)
 {
 	int i;
@@ -45,7 +45,7 @@ int trigger_count(void *ctx)
 	return 0;
 }
 
-SEC("raw_tp")
+SEC("?raw_tp")
 int trigger_driver(void *ctx)
 {
 	int i;
@@ -56,42 +56,42 @@ int trigger_driver(void *ctx)
 	return 0;
 }
 
-SEC("kprobe/bpf_get_numa_node_id")
+SEC("?kprobe/bpf_get_numa_node_id")
 int bench_trigger_kprobe(void *ctx)
 {
 	inc_counter();
 	return 0;
 }
 
-SEC("kretprobe/bpf_get_numa_node_id")
+SEC("?kretprobe/bpf_get_numa_node_id")
 int bench_trigger_kretprobe(void *ctx)
 {
 	inc_counter();
 	return 0;
 }
 
-SEC("kprobe.multi/bpf_get_numa_node_id")
+SEC("?kprobe.multi/bpf_get_numa_node_id")
 int bench_trigger_kprobe_multi(void *ctx)
 {
 	inc_counter();
 	return 0;
 }
 
-SEC("kretprobe.multi/bpf_get_numa_node_id")
+SEC("?kretprobe.multi/bpf_get_numa_node_id")
 int bench_trigger_kretprobe_multi(void *ctx)
 {
 	inc_counter();
 	return 0;
 }
 
-SEC("fentry/bpf_get_numa_node_id")
+SEC("?fentry/bpf_get_numa_node_id")
 int bench_trigger_fentry(void *ctx)
 {
 	inc_counter();
 	return 0;
 }
 
-SEC("fexit/bpf_get_numa_node_id")
+SEC("?fexit/bpf_get_numa_node_id")
 int bench_trigger_fexit(void *ctx)
 {
 	inc_counter();
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection
  2024-03-22  0:00 [PATCH bpf-next 0/6] bench: fast in-kernel triggering benchmarks Andrii Nakryiko
                   ` (3 preceding siblings ...)
  2024-03-22  0:00 ` [PATCH bpf-next 4/6] selftests/bpf: lazy-load trigger bench BPF programs Andrii Nakryiko
@ 2024-03-22  0:00 ` Andrii Nakryiko
  2024-03-22 13:12   ` Jiri Olsa
  2024-03-25 17:36   ` Alexei Starovoitov
  2024-03-22  0:00 ` [PATCH bpf-next 6/6] selftests/bpf: add batched tp/raw_tp/fmodret tests Andrii Nakryiko
  5 siblings, 2 replies; 16+ messages in thread
From: Andrii Nakryiko @ 2024-03-22  0:00 UTC (permalink / raw
  To: bpf, ast, daniel, martin.lau; +Cc: andrii, kernel-team

Add a simple bpf_test_tp() kfunc, available to all program types, that
is useful for various testing and benchmarking scenarios, as it allows
to trigger most tracing BPF program types from BPF side, allowing to do
complex testing and benchmarking scenarios.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 kernel/bpf/bpf_test.h | 34 ++++++++++++++++++++++++++++++++++
 kernel/bpf/helpers.c  | 13 +++++++++++++
 2 files changed, 47 insertions(+)
 create mode 100644 kernel/bpf/bpf_test.h

diff --git a/kernel/bpf/bpf_test.h b/kernel/bpf/bpf_test.h
new file mode 100644
index 000000000000..c779927d3574
--- /dev/null
+++ b/kernel/bpf/bpf_test.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bpf_test
+
+#if !defined(_TRACE_BPF_TEST_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_BPF_TEST_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(bpf_test,
+
+	TP_PROTO(int nonce),
+
+	TP_ARGS(nonce),
+
+	TP_STRUCT__entry(
+		__field(int, nonce)
+	),
+
+	TP_fast_assign(
+		__entry->nonce = nonce;
+	),
+
+	TP_printk("nonce %d", __entry->nonce)
+);
+
+#endif /* _TRACE_BPF_TEST_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE bpf_test
+
+#include <trace/define_trace.h>
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 9234174ccb21..67bf9659c447 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -26,6 +26,10 @@
 
 #include "../../lib/kstrtox.h"
 
+#define CREATE_TRACE_POINTS
+#include "bpf_test.h"
+
+
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
  * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
@@ -2549,6 +2553,14 @@ __bpf_kfunc void bpf_throw(u64 cookie)
 	WARN(1, "A call to BPF exception callback should never return\n");
 }
 
+__bpf_kfunc int bpf_test_tp(int nonce)
+{
+	trace_bpf_test(nonce);
+
+	return nonce;
+}
+ALLOW_ERROR_INJECTION(bpf_test_tp, ERRNO);
+
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(generic_btf_ids)
@@ -2625,6 +2637,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null)
 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
 BTF_ID_FLAGS(func, bpf_dynptr_size)
 BTF_ID_FLAGS(func, bpf_dynptr_clone)
+BTF_ID_FLAGS(func, bpf_test_tp)
 BTF_KFUNCS_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 6/6] selftests/bpf: add batched tp/raw_tp/fmodret tests
  2024-03-22  0:00 [PATCH bpf-next 0/6] bench: fast in-kernel triggering benchmarks Andrii Nakryiko
                   ` (4 preceding siblings ...)
  2024-03-22  0:00 ` [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection Andrii Nakryiko
@ 2024-03-22  0:00 ` Andrii Nakryiko
  5 siblings, 0 replies; 16+ messages in thread
From: Andrii Nakryiko @ 2024-03-22  0:00 UTC (permalink / raw
  To: bpf, ast, daniel, martin.lau; +Cc: andrii, kernel-team

Utilize bpf_test_tp() kfunc to have a fast way to trigger
tp/raw_tp/fmodret programs from another BPF program, which gives us
comparable batched benchmarks to (batched) kprobe/fentry benchmarks.

We don't switch kprobe/fentry batched benchmarks to this kfunc to make
bench tool usable on older kernels as well.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/testing/selftests/bpf/bench.c           |  6 +++
 .../selftests/bpf/benchs/bench_trigger.c      | 39 +++++++++++++++++++
 .../selftests/bpf/benchs/run_bench_trigger.sh |  3 +-
 .../selftests/bpf/progs/trigger_bench.c       | 34 ++++++++++++++++
 4 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 8b16841a3dec..82de56c8162e 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -506,6 +506,9 @@ extern const struct bench bench_trig_kprobe_multi;
 extern const struct bench bench_trig_kretprobe_multi;
 extern const struct bench bench_trig_fentry;
 extern const struct bench bench_trig_fexit;
+extern const struct bench bench_trig_fmodret;
+extern const struct bench bench_trig_tp;
+extern const struct bench bench_trig_rawtp;
 
 /* uprobe/uretprobe benchmarks */
 extern const struct bench bench_trig_uprobe_nop;
@@ -556,6 +559,9 @@ static const struct bench *benchs[] = {
 	&bench_trig_kretprobe_multi,
 	&bench_trig_fentry,
 	&bench_trig_fexit,
+	&bench_trig_fmodret,
+	&bench_trig_tp,
+	&bench_trig_rawtp,
 	/* uprobes */
 	&bench_trig_uprobe_nop,
 	&bench_trig_uretprobe_nop,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index e454d7de2252..5c4b8987ccbd 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -228,6 +228,42 @@ static void trigger_fexit_setup(void)
 	attach_bpf(ctx.skel->progs.bench_trigger_fexit);
 }
 
+static void trigger_fmodret_setup(void)
+{
+	setup_ctx();
+	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+	bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true);
+	load_ctx();
+	/* override driver program */
+	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
+	attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
+}
+
+static void trigger_tp_setup(void)
+{
+	setup_ctx();
+	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+	bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true);
+	load_ctx();
+	/* override driver program */
+	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
+	attach_bpf(ctx.skel->progs.bench_trigger_tp);
+}
+
+static void trigger_rawtp_setup(void)
+{
+	setup_ctx();
+	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+	bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true);
+	load_ctx();
+	/* override driver program */
+	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
+	attach_bpf(ctx.skel->progs.bench_trigger_rawtp);
+}
+
 /* make sure call is not inlined and not avoided by compiler, so __weak and
  * inline asm volatile in the body of the function
  *
@@ -390,6 +426,9 @@ BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
 BENCH_TRIG_KERNEL(fentry, "fentry");
 BENCH_TRIG_KERNEL(fexit, "fexit");
+BENCH_TRIG_KERNEL(fmodret, "fmodret");
+BENCH_TRIG_KERNEL(tp, "tp");
+BENCH_TRIG_KERNEL(rawtp, "rawtp");
 
 /* uprobe benchmarks */
 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME)			\
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index 6e790e294260..a690f5a68b6b 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -4,7 +4,8 @@ set -eufo pipefail
 
 def_tests=( \
 	usermode-count kernel-count syscall-count \
-	fentry fexit \
+	fentry fexit fmodret \
+	rawtp tp \
 	kprobe kprobe-multi \
 	kretprobe kretprobe-multi \
 )
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 07587cb3c9f5..914ed625441a 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -56,6 +56,19 @@ int trigger_driver(void *ctx)
 	return 0;
 }
 
+extern int bpf_test_tp(int nonce) __ksym __weak;
+
+SEC("?raw_tp")
+int trigger_driver_kfunc(void *ctx)
+{
+	int i;
+
+	for (i = 0; i < batch_iters; i++)
+		(void)bpf_test_tp(0); /* attach point for benchmarking */
+
+	return 0;
+}
+
 SEC("?kprobe/bpf_get_numa_node_id")
 int bench_trigger_kprobe(void *ctx)
 {
@@ -97,3 +110,24 @@ int bench_trigger_fexit(void *ctx)
 	inc_counter();
 	return 0;
 }
+
+SEC("?fmod_ret/bpf_test_tp")
+int bench_trigger_fmodret(void *ctx)
+{
+	inc_counter();
+	return -22;
+}
+
+SEC("?tp/bpf_test/bpf_test")
+int bench_trigger_tp(void *ctx)
+{
+	inc_counter();
+	return 0;
+}
+
+SEC("?raw_tp/bpf_test")
+int bench_trigger_rawtp(void *ctx)
+{
+	inc_counter();
+	return 0;
+}
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection
  2024-03-22  0:00 ` [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection Andrii Nakryiko
@ 2024-03-22 13:12   ` Jiri Olsa
  2024-03-22 16:52     ` Andrii Nakryiko
  2024-03-25 17:36   ` Alexei Starovoitov
  1 sibling, 1 reply; 16+ messages in thread
From: Jiri Olsa @ 2024-03-22 13:12 UTC (permalink / raw
  To: Andrii Nakryiko; +Cc: bpf, ast, daniel, martin.lau, kernel-team

On Thu, Mar 21, 2024 at 05:00:40PM -0700, Andrii Nakryiko wrote:
> Add a simple bpf_test_tp() kfunc, available to all program types, that
> is useful for various testing and benchmarking scenarios, as it allows
> to trigger most tracing BPF program types from BPF side, allowing to do
> complex testing and benchmarking scenarios.
> 
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

I'm getting compilation fail with this one:

  INSTALL libsubcmd_headers
  INSTALL libsubcmd_headers
  CALL    scripts/checksyscalls.sh
  CC      kernel/bpf/helpers.o
In file included from kernel/bpf/bpf_test.h:34,
                 from kernel/bpf/helpers.c:30:
./include/trace/define_trace.h:95:42: fatal error: ./bpf_test.h: No such file or directory
   95 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
      |                                          ^

jirka

> ---
>  kernel/bpf/bpf_test.h | 34 ++++++++++++++++++++++++++++++++++
>  kernel/bpf/helpers.c  | 13 +++++++++++++
>  2 files changed, 47 insertions(+)
>  create mode 100644 kernel/bpf/bpf_test.h
> 
> diff --git a/kernel/bpf/bpf_test.h b/kernel/bpf/bpf_test.h
> new file mode 100644
> index 000000000000..c779927d3574
> --- /dev/null
> +++ b/kernel/bpf/bpf_test.h
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM bpf_test
> +
> +#if !defined(_TRACE_BPF_TEST_H) || defined(TRACE_HEADER_MULTI_READ)
> +
> +#define _TRACE_BPF_TEST_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(bpf_test,
> +
> +	TP_PROTO(int nonce),
> +
> +	TP_ARGS(nonce),
> +
> +	TP_STRUCT__entry(
> +		__field(int, nonce)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->nonce = nonce;
> +	),
> +
> +	TP_printk("nonce %d", __entry->nonce)
> +);
> +
> +#endif /* _TRACE_BPF_TEST_H */
> +
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH .
> +#define TRACE_INCLUDE_FILE bpf_test
> +
> +#include <trace/define_trace.h>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 9234174ccb21..67bf9659c447 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -26,6 +26,10 @@
>  
>  #include "../../lib/kstrtox.h"
>  
> +#define CREATE_TRACE_POINTS
> +#include "bpf_test.h"
> +
> +
>  /* If kernel subsystem is allowing eBPF programs to call this function,
>   * inside its own verifier_ops->get_func_proto() callback it should return
>   * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
> @@ -2549,6 +2553,14 @@ __bpf_kfunc void bpf_throw(u64 cookie)
>  	WARN(1, "A call to BPF exception callback should never return\n");
>  }
>  
> +__bpf_kfunc int bpf_test_tp(int nonce)
> +{
> +	trace_bpf_test(nonce);
> +
> +	return nonce;
> +}
> +ALLOW_ERROR_INJECTION(bpf_test_tp, ERRNO);
> +
>  __bpf_kfunc_end_defs();
>  
>  BTF_KFUNCS_START(generic_btf_ids)
> @@ -2625,6 +2637,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null)
>  BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
>  BTF_ID_FLAGS(func, bpf_dynptr_size)
>  BTF_ID_FLAGS(func, bpf_dynptr_clone)
> +BTF_ID_FLAGS(func, bpf_test_tp)
>  BTF_KFUNCS_END(common_btf_ids)
>  
>  static const struct btf_kfunc_id_set common_kfunc_set = {
> -- 
> 2.43.0
> 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection
  2024-03-22 13:12   ` Jiri Olsa
@ 2024-03-22 16:52     ` Andrii Nakryiko
  0 siblings, 0 replies; 16+ messages in thread
From: Andrii Nakryiko @ 2024-03-22 16:52 UTC (permalink / raw
  To: Jiri Olsa; +Cc: Andrii Nakryiko, bpf, ast, daniel, martin.lau, kernel-team

On Fri, Mar 22, 2024 at 6:12 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Thu, Mar 21, 2024 at 05:00:40PM -0700, Andrii Nakryiko wrote:
> > Add a simple bpf_test_tp() kfunc, available to all program types, that
> > is useful for various testing and benchmarking scenarios, as it allows
> > to trigger most tracing BPF program types from BPF side, allowing to do
> > complex testing and benchmarking scenarios.
> >
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
>
> I'm getting compilation fail with this one:
>
>   INSTALL libsubcmd_headers
>   INSTALL libsubcmd_headers
>   CALL    scripts/checksyscalls.sh
>   CC      kernel/bpf/helpers.o
> In file included from kernel/bpf/bpf_test.h:34,
>                  from kernel/bpf/helpers.c:30:
> ./include/trace/define_trace.h:95:42: fatal error: ./bpf_test.h: No such file or directory
>    95 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
>       |                                          ^
>

I can't repro even with a clean build. CI also doesn't see any problem ([0]).

Maybe we need to have

#include "trace_probe.h"
#include "trace.h"

Not sure, can you please try?

I tried to do everything just like we do it in
kernel/trace/bpf_trace.c with trace_bpf_trace_printk().


  [0] https://github.com/kernel-patches/bpf/actions/runs/8383349803


> jirka
>
> > ---
> >  kernel/bpf/bpf_test.h | 34 ++++++++++++++++++++++++++++++++++
> >  kernel/bpf/helpers.c  | 13 +++++++++++++
> >  2 files changed, 47 insertions(+)
> >  create mode 100644 kernel/bpf/bpf_test.h
> >
> > diff --git a/kernel/bpf/bpf_test.h b/kernel/bpf/bpf_test.h
> > new file mode 100644
> > index 000000000000..c779927d3574
> > --- /dev/null
> > +++ b/kernel/bpf/bpf_test.h
> > @@ -0,0 +1,34 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM bpf_test
> > +
> > +#if !defined(_TRACE_BPF_TEST_H) || defined(TRACE_HEADER_MULTI_READ)
> > +
> > +#define _TRACE_BPF_TEST_H
> > +
> > +#include <linux/tracepoint.h>
> > +
> > +TRACE_EVENT(bpf_test,
> > +
> > +     TP_PROTO(int nonce),
> > +
> > +     TP_ARGS(nonce),
> > +
> > +     TP_STRUCT__entry(
> > +             __field(int, nonce)
> > +     ),
> > +
> > +     TP_fast_assign(
> > +             __entry->nonce = nonce;
> > +     ),
> > +
> > +     TP_printk("nonce %d", __entry->nonce)
> > +);
> > +
> > +#endif /* _TRACE_BPF_TEST_H */
> > +
> > +#undef TRACE_INCLUDE_PATH
> > +#define TRACE_INCLUDE_PATH .
> > +#define TRACE_INCLUDE_FILE bpf_test
> > +
> > +#include <trace/define_trace.h>
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index 9234174ccb21..67bf9659c447 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -26,6 +26,10 @@
> >
> >  #include "../../lib/kstrtox.h"
> >
> > +#define CREATE_TRACE_POINTS
> > +#include "bpf_test.h"
> > +
> > +
> >  /* If kernel subsystem is allowing eBPF programs to call this function,
> >   * inside its own verifier_ops->get_func_proto() callback it should return
> >   * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
> > @@ -2549,6 +2553,14 @@ __bpf_kfunc void bpf_throw(u64 cookie)
> >       WARN(1, "A call to BPF exception callback should never return\n");
> >  }
> >
> > +__bpf_kfunc int bpf_test_tp(int nonce)
> > +{
> > +     trace_bpf_test(nonce);
> > +
> > +     return nonce;
> > +}
> > +ALLOW_ERROR_INJECTION(bpf_test_tp, ERRNO);
> > +
> >  __bpf_kfunc_end_defs();
> >
> >  BTF_KFUNCS_START(generic_btf_ids)
> > @@ -2625,6 +2637,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null)
> >  BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
> >  BTF_ID_FLAGS(func, bpf_dynptr_size)
> >  BTF_ID_FLAGS(func, bpf_dynptr_clone)
> > +BTF_ID_FLAGS(func, bpf_test_tp)
> >  BTF_KFUNCS_END(common_btf_ids)
> >
> >  static const struct btf_kfunc_id_set common_kfunc_set = {
> > --
> > 2.43.0
> >
> >

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/6] selftests/bpf: add batched, mostly in-kernel BPF triggering benchmarks
  2024-03-22  0:00 ` [PATCH bpf-next 2/6] selftests/bpf: add batched, mostly in-kernel BPF triggering benchmarks Andrii Nakryiko
@ 2024-03-25 17:21   ` Alexei Starovoitov
  2024-03-25 23:21     ` Andrii Nakryiko
  0 siblings, 1 reply; 16+ messages in thread
From: Alexei Starovoitov @ 2024-03-25 17:21 UTC (permalink / raw
  To: Andrii Nakryiko
  Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Martin KaFai Lau,
	Kernel Team, Jiri Olsa

On Thu, Mar 21, 2024 at 5:00 PM Andrii Nakryiko <andrii@kernel.org> wrote:
> +
> +static error_t parse_arg(int key, char *arg, struct argp_state *state)
> +{
> +       long ret;
> +
> +       switch (key) {
> +       case ARG_TRIG_BATCH_ITERS:
> +               ret = strtol(arg, NULL, 10);
> +               if (ret < 1 || ret > UINT_MAX) {
> +                       fprintf(stderr, "invalid --trig-batch-iters value");
> +                       argp_usage(state);
> +               }

...

> +const volatile int batch_iters = 0;
> +
> +SEC("raw_tp")
> +int trigger_count(void *ctx)
> +{
> +       int i;
> +
> +       for (i = 0; i < batch_iters; i++)
> +               inc_counter();

so it's passing the verifier due to bounded loop logic, right?

Then let's limit cmd line arg to something small instead of UINT_MAX.
Otherwise users will be surprised by the errors
coming from --trig-batch-iters 500k

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection
  2024-03-22  0:00 ` [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection Andrii Nakryiko
  2024-03-22 13:12   ` Jiri Olsa
@ 2024-03-25 17:36   ` Alexei Starovoitov
  2024-03-25 22:19     ` Andrii Nakryiko
  1 sibling, 1 reply; 16+ messages in thread
From: Alexei Starovoitov @ 2024-03-25 17:36 UTC (permalink / raw
  To: Andrii Nakryiko
  Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Martin KaFai Lau,
	Kernel Team

On Thu, Mar 21, 2024 at 5:01 PM Andrii Nakryiko <andrii@kernel.org> wrote:
>
> Add a simple bpf_test_tp() kfunc, available to all program types, that
> is useful for various testing and benchmarking scenarios, as it allows
> to trigger most tracing BPF program types from BPF side, allowing to do
> complex testing and benchmarking scenarios.
>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  kernel/bpf/bpf_test.h | 34 ++++++++++++++++++++++++++++++++++
>  kernel/bpf/helpers.c  | 13 +++++++++++++
>  2 files changed, 47 insertions(+)
>  create mode 100644 kernel/bpf/bpf_test.h
>
> diff --git a/kernel/bpf/bpf_test.h b/kernel/bpf/bpf_test.h
> new file mode 100644
> index 000000000000..c779927d3574
> --- /dev/null
> +++ b/kernel/bpf/bpf_test.h
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM bpf_test
> +
> +#if !defined(_TRACE_BPF_TEST_H) || defined(TRACE_HEADER_MULTI_READ)
> +
> +#define _TRACE_BPF_TEST_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(bpf_test,
> +
> +       TP_PROTO(int nonce),
> +
> +       TP_ARGS(nonce),
> +
> +       TP_STRUCT__entry(
> +               __field(int, nonce)
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->nonce = nonce;
> +       ),
> +
> +       TP_printk("nonce %d", __entry->nonce)
> +);
> +
> +#endif /* _TRACE_BPF_TEST_H */
> +
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH .
> +#define TRACE_INCLUDE_FILE bpf_test
> +
> +#include <trace/define_trace.h>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 9234174ccb21..67bf9659c447 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -26,6 +26,10 @@
>
>  #include "../../lib/kstrtox.h"
>
> +#define CREATE_TRACE_POINTS
> +#include "bpf_test.h"
> +
> +
>  /* If kernel subsystem is allowing eBPF programs to call this function,
>   * inside its own verifier_ops->get_func_proto() callback it should return
>   * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
> @@ -2549,6 +2553,14 @@ __bpf_kfunc void bpf_throw(u64 cookie)
>         WARN(1, "A call to BPF exception callback should never return\n");
>  }
>
> +__bpf_kfunc int bpf_test_tp(int nonce)
> +{
> +       trace_bpf_test(nonce);
> +
> +       return nonce;
> +}
> +ALLOW_ERROR_INJECTION(bpf_test_tp, ERRNO);

There is a better way with register_btf_fmodret_id_set().
Error injection can be disabled in config.

Also we have two such test funcs already:
bpf_modify_return_test*
and they can be exercised by bpf_prog_test_run_tracing.
Let's make it recognize 'repeat' argument and call
bpf_modify_return_test or 3rd such func multiple times ?
Exercise of test tracepoint can be there as well.
Asking bpf prog to call a kfunc to call a tracepoint
looks like extra hop.
Existing test_run facility should be able to accommodate.

I still don't get it how modifying the kernel is better than
adding all that to a kernel module.
When you want to run bench tool on a different server
you either need to build/copy bpf_testmod or
build/copy the whole kernel.
But fine.
All the test things in net/bpf/test_run.c will stay.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection
  2024-03-25 17:36   ` Alexei Starovoitov
@ 2024-03-25 22:19     ` Andrii Nakryiko
  2024-03-26  1:43       ` Andrii Nakryiko
  0 siblings, 1 reply; 16+ messages in thread
From: Andrii Nakryiko @ 2024-03-25 22:19 UTC (permalink / raw
  To: Alexei Starovoitov
  Cc: Andrii Nakryiko, bpf, Alexei Starovoitov, Daniel Borkmann,
	Martin KaFai Lau, Kernel Team

On Mon, Mar 25, 2024 at 10:37 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu, Mar 21, 2024 at 5:01 PM Andrii Nakryiko <andrii@kernel.org> wrote:
> >
> > Add a simple bpf_test_tp() kfunc, available to all program types, that
> > is useful for various testing and benchmarking scenarios, as it allows
> > to trigger most tracing BPF program types from BPF side, allowing to do
> > complex testing and benchmarking scenarios.
> >
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  kernel/bpf/bpf_test.h | 34 ++++++++++++++++++++++++++++++++++
> >  kernel/bpf/helpers.c  | 13 +++++++++++++
> >  2 files changed, 47 insertions(+)
> >  create mode 100644 kernel/bpf/bpf_test.h
> >
> > diff --git a/kernel/bpf/bpf_test.h b/kernel/bpf/bpf_test.h
> > new file mode 100644
> > index 000000000000..c779927d3574
> > --- /dev/null
> > +++ b/kernel/bpf/bpf_test.h
> > @@ -0,0 +1,34 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM bpf_test
> > +
> > +#if !defined(_TRACE_BPF_TEST_H) || defined(TRACE_HEADER_MULTI_READ)
> > +
> > +#define _TRACE_BPF_TEST_H
> > +
> > +#include <linux/tracepoint.h>
> > +
> > +TRACE_EVENT(bpf_test,
> > +
> > +       TP_PROTO(int nonce),
> > +
> > +       TP_ARGS(nonce),
> > +
> > +       TP_STRUCT__entry(
> > +               __field(int, nonce)
> > +       ),
> > +
> > +       TP_fast_assign(
> > +               __entry->nonce = nonce;
> > +       ),
> > +
> > +       TP_printk("nonce %d", __entry->nonce)
> > +);
> > +
> > +#endif /* _TRACE_BPF_TEST_H */
> > +
> > +#undef TRACE_INCLUDE_PATH
> > +#define TRACE_INCLUDE_PATH .
> > +#define TRACE_INCLUDE_FILE bpf_test
> > +
> > +#include <trace/define_trace.h>
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index 9234174ccb21..67bf9659c447 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -26,6 +26,10 @@
> >
> >  #include "../../lib/kstrtox.h"
> >
> > +#define CREATE_TRACE_POINTS
> > +#include "bpf_test.h"
> > +
> > +
> >  /* If kernel subsystem is allowing eBPF programs to call this function,
> >   * inside its own verifier_ops->get_func_proto() callback it should return
> >   * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
> > @@ -2549,6 +2553,14 @@ __bpf_kfunc void bpf_throw(u64 cookie)
> >         WARN(1, "A call to BPF exception callback should never return\n");
> >  }
> >
> > +__bpf_kfunc int bpf_test_tp(int nonce)
> > +{
> > +       trace_bpf_test(nonce);
> > +
> > +       return nonce;
> > +}
> > +ALLOW_ERROR_INJECTION(bpf_test_tp, ERRNO);
>
> There is a better way with register_btf_fmodret_id_set().
> Error injection can be disabled in config.

ok, didn't know about it

>
> Also we have two such test funcs already:
> bpf_modify_return_test*
> and they can be exercised by bpf_prog_test_run_tracing.
> Let's make it recognize 'repeat' argument and call
> bpf_modify_return_test or 3rd such func multiple times ?

I'll add bpf_modify_return_test_tp() to not touch all the tests using
bpf_modify_return_test() and bpf_modify_return_test2(), if that's ok.
Existing ones expect some memory pointer, dereference it, etc, it
seems cleaner to have a dedicated tp-triggering one for this.

> Exercise of test tracepoint can be there as well.
> Asking bpf prog to call a kfunc to call a tracepoint
> looks like extra hop.
> Existing test_run facility should be able to accommodate.

You mean if I add bpf_modify_return_test_tp() above, I should pass an
argument of how many times that tracepoint should be triggered? Or you
mean to use test_run's repeat argument to trigger "driver program" N
times, and the driver program would just call
bpf_modify_return_test_tp() once? If the latter, I'm not sure it's the
same as calling the driver program once and doing a loop inside, as
we'll measure more of calling driver program overhead (N times vs 1
time right now, per each N tp/fmod_ret calls).

(But tbh, not having to use test_run's repeat functionality is a
benefit, IMO, we can have more flexible counting/timing code and
whatever else we might need, I'm not sure why using test_run's repeat
is advantageous here)

Not sure what you are trying to optimize for here, please clarify.

>
> I still don't get it how modifying the kernel is better than
> adding all that to a kernel module.
> When you want to run bench tool on a different server
> you either need to build/copy bpf_testmod or
> build/copy the whole kernel.
> But fine.

I optimize for having a single pre-built bench binary that one can scp
to multiple different (potentially custom built) kernels and run
benchmarks. Of course on old kernels where we don't yet have this new
bpf_modify_return_test_tp() kfunc some of benchmarks won't work
(raw_tp/fmod_ret/tp only), but that's fine, as newer kernels get
deployed we'll eventually get them even in production. kprobe/fentry
benchmark will work as is (which is why I'm not switching them to new
kfunc, so we can run them on a wider variety of kernels).

> All the test things in net/bpf/test_run.c will stay.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/6] selftests/bpf: add batched, mostly in-kernel BPF triggering benchmarks
  2024-03-25 17:21   ` Alexei Starovoitov
@ 2024-03-25 23:21     ` Andrii Nakryiko
  0 siblings, 0 replies; 16+ messages in thread
From: Andrii Nakryiko @ 2024-03-25 23:21 UTC (permalink / raw
  To: Alexei Starovoitov
  Cc: Andrii Nakryiko, bpf, Alexei Starovoitov, Daniel Borkmann,
	Martin KaFai Lau, Kernel Team, Jiri Olsa

On Mon, Mar 25, 2024 at 10:21 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu, Mar 21, 2024 at 5:00 PM Andrii Nakryiko <andrii@kernel.org> wrote:
> > +
> > +static error_t parse_arg(int key, char *arg, struct argp_state *state)
> > +{
> > +       long ret;
> > +
> > +       switch (key) {
> > +       case ARG_TRIG_BATCH_ITERS:
> > +               ret = strtol(arg, NULL, 10);
> > +               if (ret < 1 || ret > UINT_MAX) {
> > +                       fprintf(stderr, "invalid --trig-batch-iters value");
> > +                       argp_usage(state);
> > +               }
>
> ...
>
> > +const volatile int batch_iters = 0;
> > +
> > +SEC("raw_tp")
> > +int trigger_count(void *ctx)
> > +{
> > +       int i;
> > +
> > +       for (i = 0; i < batch_iters; i++)
> > +               inc_counter();
>
> so it's passing the verifier due to bounded loop logic, right?

yep

>
> Then let's limit cmd line arg to something small instead of UINT_MAX.
> Otherwise users will be surprised by the errors
> coming from --trig-batch-iters 500k

sure, I don't think it makes any difference much beyond 100-200, I
tried 500 and it made no difference. So I can limit it to 500 or a
1000

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection
  2024-03-25 22:19     ` Andrii Nakryiko
@ 2024-03-26  1:43       ` Andrii Nakryiko
  2024-03-26  2:32         ` Alexei Starovoitov
  2024-03-26 10:57         ` Jiri Olsa
  0 siblings, 2 replies; 16+ messages in thread
From: Andrii Nakryiko @ 2024-03-26  1:43 UTC (permalink / raw
  To: Alexei Starovoitov
  Cc: Andrii Nakryiko, bpf, Alexei Starovoitov, Daniel Borkmann,
	Martin KaFai Lau, Kernel Team

On Mon, Mar 25, 2024 at 3:19 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Mar 25, 2024 at 10:37 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Thu, Mar 21, 2024 at 5:01 PM Andrii Nakryiko <andrii@kernel.org> wrote:
> > >
> > > Add a simple bpf_test_tp() kfunc, available to all program types, that
> > > is useful for various testing and benchmarking scenarios, as it allows
> > > to trigger most tracing BPF program types from BPF side, allowing to do
> > > complex testing and benchmarking scenarios.
> > >
> > > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > > ---
> > >  kernel/bpf/bpf_test.h | 34 ++++++++++++++++++++++++++++++++++
> > >  kernel/bpf/helpers.c  | 13 +++++++++++++
> > >  2 files changed, 47 insertions(+)
> > >  create mode 100644 kernel/bpf/bpf_test.h
> > >
> > > diff --git a/kernel/bpf/bpf_test.h b/kernel/bpf/bpf_test.h
> > > new file mode 100644
> > > index 000000000000..c779927d3574
> > > --- /dev/null
> > > +++ b/kernel/bpf/bpf_test.h
> > > @@ -0,0 +1,34 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#undef TRACE_SYSTEM
> > > +#define TRACE_SYSTEM bpf_test
> > > +
> > > +#if !defined(_TRACE_BPF_TEST_H) || defined(TRACE_HEADER_MULTI_READ)
> > > +
> > > +#define _TRACE_BPF_TEST_H
> > > +
> > > +#include <linux/tracepoint.h>
> > > +
> > > +TRACE_EVENT(bpf_test,
> > > +
> > > +       TP_PROTO(int nonce),
> > > +
> > > +       TP_ARGS(nonce),
> > > +
> > > +       TP_STRUCT__entry(
> > > +               __field(int, nonce)
> > > +       ),
> > > +
> > > +       TP_fast_assign(
> > > +               __entry->nonce = nonce;
> > > +       ),
> > > +
> > > +       TP_printk("nonce %d", __entry->nonce)
> > > +);
> > > +
> > > +#endif /* _TRACE_BPF_TEST_H */
> > > +
> > > +#undef TRACE_INCLUDE_PATH
> > > +#define TRACE_INCLUDE_PATH .
> > > +#define TRACE_INCLUDE_FILE bpf_test
> > > +
> > > +#include <trace/define_trace.h>
> > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > > index 9234174ccb21..67bf9659c447 100644
> > > --- a/kernel/bpf/helpers.c
> > > +++ b/kernel/bpf/helpers.c
> > > @@ -26,6 +26,10 @@
> > >
> > >  #include "../../lib/kstrtox.h"
> > >
> > > +#define CREATE_TRACE_POINTS
> > > +#include "bpf_test.h"
> > > +
> > > +
> > >  /* If kernel subsystem is allowing eBPF programs to call this function,
> > >   * inside its own verifier_ops->get_func_proto() callback it should return
> > >   * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
> > > @@ -2549,6 +2553,14 @@ __bpf_kfunc void bpf_throw(u64 cookie)
> > >         WARN(1, "A call to BPF exception callback should never return\n");
> > >  }
> > >
> > > +__bpf_kfunc int bpf_test_tp(int nonce)
> > > +{
> > > +       trace_bpf_test(nonce);
> > > +
> > > +       return nonce;
> > > +}
> > > +ALLOW_ERROR_INJECTION(bpf_test_tp, ERRNO);
> >
> > There is a better way with register_btf_fmodret_id_set().
> > Error injection can be disabled in config.
>
> ok, didn't know about it
>
> >
> > Also we have two such test funcs already:
> > bpf_modify_return_test*
> > and they can be exercised by bpf_prog_test_run_tracing.
> > Let's make it recognize 'repeat' argument and call
> > bpf_modify_return_test or 3rd such func multiple times ?
>
> I'll add bpf_modify_return_test_tp() to not touch all the tests using
> bpf_modify_return_test() and bpf_modify_return_test2(), if that's ok.
> Existing ones expect some memory pointer, dereference it, etc, it
> seems cleaner to have a dedicated tp-triggering one for this.
>
> > Exercise of test tracepoint can be there as well.
> > Asking bpf prog to call a kfunc to call a tracepoint
> > looks like extra hop.
> > Existing test_run facility should be able to accommodate.
>
> You mean if I add bpf_modify_return_test_tp() above, I should pass an
> argument of how many times that tracepoint should be triggered? Or you
> mean to use test_run's repeat argument to trigger "driver program" N
> times, and the driver program would just call
> bpf_modify_return_test_tp() once? If the latter, I'm not sure it's the
> same as calling the driver program once and doing a loop inside, as
> we'll measure more of calling driver program overhead (N times vs 1
> time right now, per each N tp/fmod_ret calls).
>
> (But tbh, not having to use test_run's repeat functionality is a
> benefit, IMO, we can have more flexible counting/timing code and
> whatever else we might need, I'm not sure why using test_run's repeat
> is advantageous here)
>
> Not sure what you are trying to optimize for here, please clarify.
>

So I currently have these changes. I moved tp into bpf_test_run.h
(didn't know we have that, this should eliminate the issue that Jiri
saw as well). Moved kfunc into net/bpf/test_run.c and renamed it to
bpf_modify_return_test_tp(), but I kept it referenced in "common
kfuncs", so that raw_tp and others could call it (doesn't seem like I
need extern declaration for it).\

I just didn't touch the test_run functionality. All this works in
bench tool, which is nice.

diff --git a/include/trace/events/bpf_test_run.h
b/include/trace/events/bpf_test_run.h
index 265447e3f71a..0c924d39b7cb 100644
--- a/include/trace/events/bpf_test_run.h
+++ b/include/trace/events/bpf_test_run.h
@@ -7,6 +7,23 @@

 #include <linux/tracepoint.h>

+TRACE_EVENT(bpf_trigger_tp,
+
+       TP_PROTO(int nonce),
+
+       TP_ARGS(nonce),
+
+       TP_STRUCT__entry(
+               __field(int, nonce)
+       ),
+
+       TP_fast_assign(
+               __entry->nonce = nonce;
+       ),
+
+       TP_printk("nonce %d", __entry->nonce)
+);
+
 DECLARE_EVENT_CLASS(bpf_test_finish,

        TP_PROTO(int *err),
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 67bf9659c447..a85dc684450a 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2553,14 +2553,6 @@ __bpf_kfunc void bpf_throw(u64 cookie)
        WARN(1, "A call to BPF exception callback should never return\n");
 }

-__bpf_kfunc int bpf_test_tp(int nonce)
-{
-       trace_bpf_test(nonce);
-
-       return nonce;
-}
-ALLOW_ERROR_INJECTION(bpf_test_tp, ERRNO);
-
 __bpf_kfunc_end_defs();

 BTF_KFUNCS_START(generic_btf_ids)
@@ -2637,7 +2629,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null)
 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
 BTF_ID_FLAGS(func, bpf_dynptr_size)
 BTF_ID_FLAGS(func, bpf_dynptr_clone)
-BTF_ID_FLAGS(func, bpf_test_tp)
+BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
 BTF_KFUNCS_END(common_btf_ids)

 static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 61efeadaff8d..f6aad4ed2ab2 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -575,6 +575,13 @@ __bpf_kfunc int bpf_modify_return_test2(int a,
int *b, short c, int d,
        return a + *b + c + d + (long)e + f + g;
 }

+__bpf_kfunc int bpf_modify_return_test_tp(int nonce)
+{
+       trace_bpf_trigger_tp(nonce);
+
+       return nonce;
+}
+
 int noinline bpf_fentry_shadow_test(int a)
 {
        return a + 1;
@@ -622,6 +629,7 @@ __bpf_kfunc_end_defs();
 BTF_KFUNCS_START(bpf_test_modify_return_ids)
 BTF_ID_FLAGS(func, bpf_modify_return_test)
 BTF_ID_FLAGS(func, bpf_modify_return_test2)
+BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
 BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
 BTF_KFUNCS_END(bpf_test_modify_return_ids)

diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c
b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 914ed625441a..2619ed193c65 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -56,7 +56,7 @@ int trigger_driver(void *ctx)
        return 0;
 }

-extern int bpf_test_tp(int nonce) __ksym __weak;
+extern int bpf_modify_return_test_tp(int nonce) __ksym __weak;

 SEC("?raw_tp")
 int trigger_driver_kfunc(void *ctx)
@@ -64,7 +64,7 @@ int trigger_driver_kfunc(void *ctx)
        int i;

        for (i = 0; i < batch_iters; i++)
-               (void)bpf_test_tp(0); /* attach point for benchmarking */
+               (void)bpf_modify_return_test_tp(0); /* attach point
for benchmarking */

        return 0;
 }
@@ -111,21 +111,21 @@ int bench_trigger_fexit(void *ctx)
        return 0;
 }

-SEC("?fmod_ret/bpf_test_tp")
+SEC("?fmod_ret/bpf_modify_return_test_tp")
 int bench_trigger_fmodret(void *ctx)
 {
        inc_counter();
        return -22;
 }

-SEC("?tp/bpf_test/bpf_test")
+SEC("?tp/bpf_test_run/bpf_trigger_tp")
 int bench_trigger_tp(void *ctx)
 {
        inc_counter();
        return 0;
 }

-SEC("?raw_tp/bpf_test")
+SEC("?raw_tp/bpf_trigger_tp")
 int bench_trigger_rawtp(void *ctx)
 {
        inc_counter();

> >
> > I still don't get it how modifying the kernel is better than
> > adding all that to a kernel module.
> > When you want to run bench tool on a different server
> > you either need to build/copy bpf_testmod or
> > build/copy the whole kernel.
> > But fine.
>
> I optimize for having a single pre-built bench binary that one can scp
> to multiple different (potentially custom built) kernels and run
> benchmarks. Of course on old kernels where we don't yet have this new
> bpf_modify_return_test_tp() kfunc some of benchmarks won't work
> (raw_tp/fmod_ret/tp only), but that's fine, as newer kernels get
> deployed we'll eventually get them even in production. kprobe/fentry
> benchmark will work as is (which is why I'm not switching them to new
> kfunc, so we can run them on a wider variety of kernels).
>
> > All the test things in net/bpf/test_run.c will stay.

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection
  2024-03-26  1:43       ` Andrii Nakryiko
@ 2024-03-26  2:32         ` Alexei Starovoitov
  2024-03-26 10:57         ` Jiri Olsa
  1 sibling, 0 replies; 16+ messages in thread
From: Alexei Starovoitov @ 2024-03-26  2:32 UTC (permalink / raw
  To: Andrii Nakryiko
  Cc: Andrii Nakryiko, bpf, Alexei Starovoitov, Daniel Borkmann,
	Martin KaFai Lau, Kernel Team

On Mon, Mar 25, 2024 at 6:43 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> So I currently have these changes. I moved tp into bpf_test_run.h

...

> +__bpf_kfunc int bpf_modify_return_test_tp(int nonce)
> +{
> +       trace_bpf_trigger_tp(nonce);
> +
> +       return nonce;
> +}

Looks good to me.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection
  2024-03-26  1:43       ` Andrii Nakryiko
  2024-03-26  2:32         ` Alexei Starovoitov
@ 2024-03-26 10:57         ` Jiri Olsa
  1 sibling, 0 replies; 16+ messages in thread
From: Jiri Olsa @ 2024-03-26 10:57 UTC (permalink / raw
  To: Andrii Nakryiko
  Cc: Alexei Starovoitov, Andrii Nakryiko, bpf, Alexei Starovoitov,
	Daniel Borkmann, Martin KaFai Lau, Kernel Team

On Mon, Mar 25, 2024 at 06:43:29PM -0700, Andrii Nakryiko wrote:

SNIP

> > I'll add bpf_modify_return_test_tp() to not touch all the tests using
> > bpf_modify_return_test() and bpf_modify_return_test2(), if that's ok.
> > Existing ones expect some memory pointer, dereference it, etc, it
> > seems cleaner to have a dedicated tp-triggering one for this.
> >
> > > Exercise of test tracepoint can be there as well.
> > > Asking bpf prog to call a kfunc to call a tracepoint
> > > looks like extra hop.
> > > Existing test_run facility should be able to accommodate.
> >
> > You mean if I add bpf_modify_return_test_tp() above, I should pass an
> > argument of how many times that tracepoint should be triggered? Or you
> > mean to use test_run's repeat argument to trigger "driver program" N
> > times, and the driver program would just call
> > bpf_modify_return_test_tp() once? If the latter, I'm not sure it's the
> > same as calling the driver program once and doing a loop inside, as
> > we'll measure more of calling driver program overhead (N times vs 1
> > time right now, per each N tp/fmod_ret calls).
> >
> > (But tbh, not having to use test_run's repeat functionality is a
> > benefit, IMO, we can have more flexible counting/timing code and
> > whatever else we might need, I'm not sure why using test_run's repeat
> > is advantageous here)
> >
> > Not sure what you are trying to optimize for here, please clarify.
> >
> 
> So I currently have these changes. I moved tp into bpf_test_run.h
> (didn't know we have that, this should eliminate the issue that Jiri
> saw as well). Moved kfunc into net/bpf/test_run.c and renamed it to

sorry I did not get to it yet.. will test the new version

jirka

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2024-03-26 10:57 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-22  0:00 [PATCH bpf-next 0/6] bench: fast in-kernel triggering benchmarks Andrii Nakryiko
2024-03-22  0:00 ` [PATCH bpf-next 1/6] selftests/bpf: rename and clean up userspace-triggered benchmarks Andrii Nakryiko
2024-03-22  0:00 ` [PATCH bpf-next 2/6] selftests/bpf: add batched, mostly in-kernel BPF triggering benchmarks Andrii Nakryiko
2024-03-25 17:21   ` Alexei Starovoitov
2024-03-25 23:21     ` Andrii Nakryiko
2024-03-22  0:00 ` [PATCH bpf-next 3/6] selftests/bpf: remove syscall-driven benchs, keep syscall-count only Andrii Nakryiko
2024-03-22  0:00 ` [PATCH bpf-next 4/6] selftests/bpf: lazy-load trigger bench BPF programs Andrii Nakryiko
2024-03-22  0:00 ` [PATCH bpf-next 5/6] bpf: add bpf_test_tp() kfunc triggering tp and allowing error injection Andrii Nakryiko
2024-03-22 13:12   ` Jiri Olsa
2024-03-22 16:52     ` Andrii Nakryiko
2024-03-25 17:36   ` Alexei Starovoitov
2024-03-25 22:19     ` Andrii Nakryiko
2024-03-26  1:43       ` Andrii Nakryiko
2024-03-26  2:32         ` Alexei Starovoitov
2024-03-26 10:57         ` Jiri Olsa
2024-03-22  0:00 ` [PATCH bpf-next 6/6] selftests/bpf: add batched tp/raw_tp/fmodret tests Andrii Nakryiko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).