[PATCH 0/4] CSV-related improvements

mwrap (Perl version) user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed

* [PATCH 0/4] CSV-related improvements
@ 2023-01-11  1:12 Eric Wong
  2023-01-11  1:12 ` [PATCH 1/4] support MWRAP=dump_csv:$FILENAME parameter Eric Wong
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-11  1:12 UTC (permalink / raw)
  To: mwrap-perl

It should've been CSV from the beginning...

Eric Wong (4):
  support MWRAP=dump_csv:$FILENAME parameter
  add mwrap-decode-csv tool
  %p => PID expansion for dump_path + dump_csv
  rewrite README and update manpage to favor CSV

 MANIFEST                  |   1 +
 Makefile.PL               |   3 +-
 Mwrap.xs                  |   5 +
 README                    |  61 ++++++-----
 httpd.h                   | 217 ++++++++++++++++++++------------------
 lib/Devel/Mwrap/Rproxy.pm |  33 +++---
 mwrap_core.h              |  50 +++++++--
 script/mwrap-decode-csv   |  57 ++++++++++
 script/mwrap-perl         |  18 +++-
 t/mwrap.t                 |  36 ++++++-
 10 files changed, 327 insertions(+), 154 deletions(-)
 create mode 100644 script/mwrap-decode-csv

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/4] support MWRAP=dump_csv:$FILENAME parameter
  2023-01-11  1:12 [PATCH 0/4] CSV-related improvements Eric Wong
@ 2023-01-11  1:12 ` Eric Wong
  2023-01-11  1:12 ` [PATCH 2/4] add mwrap-decode-csv tool Eric Wong
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-11  1:12 UTC (permalink / raw)
  To: mwrap-perl

Just reusing code from httpd.
---
 httpd.h           | 217 ++++++++++++++++++++++++----------------------
 mwrap_core.h      |  35 ++++++--
 script/mwrap-perl |  12 ++-
 t/mwrap.t         |  21 ++++-
 4 files changed, 173 insertions(+), 112 deletions(-)

diff --git a/httpd.h b/httpd.h
index ef4d83c..9219d36 100644
--- a/httpd.h
+++ b/httpd.h
@@ -504,9 +504,11 @@ static off_t write_loc_name(FILE *fp, const struct src_loc *l)
 	return end - beg;
 }
 
-static struct h1_src_loc *accumulate(unsigned long min, size_t *hslc, FILE *lp)
+static struct h1_src_loc *
+accumulate(struct mw_fbuf *lb, unsigned long min, size_t *hslc)
 {
 	struct mw_fbuf fb;
+	if (!fbuf_init(lb)) return NULL;
 	if (!fbuf_init(&fb)) return NULL;
 	rcu_read_lock();
 	struct cds_lfht *t = CMM_LOAD_SHARED(totals);
@@ -528,18 +530,23 @@ static struct h1_src_loc *accumulate(unsigned long min, size_t *hslc, FILE *lp)
 			HUGE_VAL;
 		hsl.max_life = uatomic_read(&l->max_lifespan);
 		hsl.sl = l;
-		hsl.lname_len = write_loc_name(lp, l);
+		hsl.lname_len = write_loc_name(lb->fp, l);
 		fwrite(&hsl, sizeof(hsl), 1, fb.fp);
 	}
 	rcu_read_unlock();
 
-	struct h1_src_loc *hslv;
-	if (fbuf_close(&fb)) {
-		hslv = NULL;
-	} else {
-		*hslc = fb.len / sizeof(*hslv);
-		mwrap_assert((fb.len % sizeof(*hslv)) == 0);
-		hslv = (struct h1_src_loc *)fb.ptr;
+	if (fbuf_close(&fb) || fbuf_close(lb))
+		return NULL;
+
+	struct h1_src_loc *hslv = (struct h1_src_loc *)fb.ptr;
+	*hslc = fb.len / sizeof(*hslv);
+	mwrap_assert((fb.len % sizeof(*hslv)) == 0);
+	char *n = lb->ptr;
+	for (size_t i = 0; i < *hslc; ++i) {
+		hslv[i].loc_name = n;
+		n += hslv[i].lname_len;
+		if (hslv[i].lname_len < 0)
+			return NULL;
 	}
 	return hslv;
 }
@@ -609,124 +616,128 @@ static enum mw_qev each_at(struct mw_h1 *h1, struct mw_h1req *h1r)
 	return h1_200(h1, &html, TYPE_HTML);
 }
 
-/* /$PID/each/$MIN endpoint */
-static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
-				unsigned long min, bool csv)
-{
-	static const char default_sort[] = "bytes";
-	const char *sort;
-	size_t sort_len = 0;
+typedef int (*cmp_fn)(const void *, const void *);
 
-	if (!csv) {
-		sort = default_sort;
-		sort_len = sizeof(default_sort) - 1;
+static cmp_fn write_csv_header(FILE *fp, const char *sort, size_t sort_len)
+{
+	cmp_fn cmp = NULL;
+	for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+		const char *fn = fields[i].fname;
+		if (i)
+			fputc(',', fp);
+		fputs(fn, fp);
+		if (fields[i].flen == sort_len && !memcmp(fn, sort, sort_len))
+			cmp = fields[i].cmp;
 	}
+	fputc('\n', fp);
+	return cmp;
+}
 
-	if (h1r->qstr && h1r->qlen > 5 && !memcmp(h1r->qstr, "sort=", 5)) {
-		sort = h1r->qstr + 5;
-		sort_len = h1r->qlen - 5;
+static void write_csv_data(FILE *fp, struct h1_src_loc *hslv, size_t hslc)
+{
+	for (size_t i = 0; i < hslc; i++) {
+		struct h1_src_loc *hsl = &hslv[i];
+
+		fprintf(fp, "%zu,%zu,%zu,%zu,%0.3f,%zu,",
+			hsl->bytes, hsl->allocations, hsl->frees,
+			hsl->live, hsl->mean_life, hsl->max_life);
+		write_q_csv(fp, hsl->loc_name, hsl->lname_len);
+		fputc('\n', fp);
 	}
+}
 
-	size_t hslc;
+static void *write_csv(FILE *fp, size_t min, const char *sort, size_t sort_len)
+{
 	AUTO_CLOFREE struct mw_fbuf lb;
-	if (!fbuf_init(&lb)) return h1_close(h1);
-	AUTO_FREE struct h1_src_loc *hslv = accumulate(min, &hslc, lb.fp);
-	if (!hslv)
-		return h1_close(h1);
+	size_t hslc;
+	AUTO_FREE struct h1_src_loc *hslv = accumulate(&lb, min, &hslc);
+	if (!hslv) return NULL;
 
-	if (fbuf_close(&lb))
-		return h1_close(h1);
+	cmp_fn cmp = write_csv_header(fp, sort, sort_len);
+	if (cmp)
+		qsort(hslv, hslc, sizeof(*hslv), cmp);
+	write_csv_data(fp, hslv, hslc);
+	return fp;
+}
 
-	char *n = lb.ptr;
-	for (size_t i = 0; i < hslc; ++i) {
-		hslv[i].loc_name = n;
-		n += hslv[i].lname_len;
-		if (hslv[i].lname_len < 0)
-			return h1_close(h1);
+/* /$PID/each/$MIN endpoint */
+static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
+				size_t min, bool csv)
+{
+	static const char default_sort[] = "bytes";
+	const char *sort = csv ? NULL : default_sort;
+	size_t sort_len = csv ? 0 : (sizeof(default_sort) - 1);
+
+	if (h1r->qstr && h1r->qlen > 5 && !memcmp(h1r->qstr, "sort=", 5)) {
+		sort = h1r->qstr + 5;
+		sort_len = h1r->qlen - 5;
 	}
 
 	struct mw_fbuf bdy;
 	FILE *fp = wbuf_init(&bdy);
 	if (!fp) return h1_close(h1);
-
-	if (!csv) {
-		unsigned depth = (unsigned)CMM_LOAD_SHARED(bt_req_depth);
-		fprintf(fp, "<html><head><title>mwrap each &gt;%lu"
-			"</title></head><body><p>mwrap each &gt;%lu "
-			"(change `%lu' in URL to adjust filtering) - "
-			"MWRAP=bt:%u <a href=\"%lu.csv\">.csv</a>",
-			min, min, min, depth, min);
-		show_stats(fp);
-		/* need borders to distinguish multi-level traces */
-		if (depth)
-			FPUTS("<table\nborder=1><tr>", fp);
-		else /* save screen space if only tracing one line */
-			FPUTS("<table><tr>", fp);
+	if (csv) {
+		if (write_csv(fp, min, sort, sort_len))
+			return h1_200(h1, &bdy, TYPE_CSV);
+		return h1_close(h1);
 	}
 
-	int (*cmp)(const void *, const void *) = NULL;
-	if (csv) {
-		for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
-			const char *fn = fields[i].fname;
-			if (i)
-				fputc(',', fp);
-			fputs(fn, fp);
-			if (fields[i].flen == sort_len &&
-					!memcmp(fn, sort, sort_len))
-				cmp = fields[i].cmp;
-		}
-		fputc('\n', fp);
-	} else {
-		for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
-			const char *fn = fields[i].fname;
-			FPUTS("<th>", fp);
-			if (fields[i].flen == sort_len &&
-					!memcmp(fn, sort, sort_len)) {
-				cmp = fields[i].cmp;
-				fprintf(fp, "<b>%s</b>", fields[i].fname);
-			} else {
-				fprintf(fp, "<a\nhref=\"./%lu?sort=%s\">%s</a>",
-					min, fn, fn);
-			}
-			FPUTS("</th>", fp);
+	size_t hslc;
+	AUTO_CLOFREE struct mw_fbuf lb;
+	AUTO_FREE struct h1_src_loc *hslv = accumulate(&lb, min, &hslc);
+	if (!hslv)
+		return h1_close(h1);
+
+	unsigned depth = (unsigned)CMM_LOAD_SHARED(bt_req_depth);
+	fprintf(fp, "<html><head><title>mwrap each &gt;%lu"
+		"</title></head><body><p>mwrap each &gt;%lu "
+		"(change `%lu' in URL to adjust filtering) - "
+		"MWRAP=bt:%u <a href=\"%lu.csv\">.csv</a>",
+		min, min, min, depth, min);
+	show_stats(fp);
+	/* need borders to distinguish multi-level traces */
+	if (depth)
+		FPUTS("<table\nborder=1><tr>", fp);
+	else /* save screen space if only tracing one line */
+		FPUTS("<table><tr>", fp);
+	cmp_fn cmp = NULL;
+	for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+		const char *fn = fields[i].fname;
+		FPUTS("<th>", fp);
+		if (fields[i].flen == sort_len &&
+				!memcmp(fn, sort, sort_len)) {
+			cmp = fields[i].cmp;
+			fprintf(fp, "<b>%s</b>", fields[i].fname);
+		} else {
+			fprintf(fp, "<a\nhref=\"./%lu?sort=%s\">%s</a>",
+				min, fn, fn);
 		}
+		FPUTS("</th>", fp);
 	}
-	if (!csv)
-		FPUTS("</tr>", fp);
+	FPUTS("</tr>", fp);
 	if (cmp)
 		qsort(hslv, hslc, sizeof(*hslv), cmp);
-	else if (!csv)
+	else
 		FPUTS("<tr><td>sort= not understood</td></tr>", fp);
-	if (csv) {
-		for (size_t i = 0; i < hslc; i++) {
-			struct h1_src_loc *hsl = &hslv[i];
 
-			fprintf(fp, "%zu,%zu,%zu,%zu,%0.3f,%zu,",
-				hsl->bytes, hsl->allocations, hsl->frees,
-				hsl->live, hsl->mean_life, hsl->max_life);
-			write_q_csv(fp, hsl->loc_name, hsl->lname_len);
-			fputc('\n', fp);
-		}
-	} else {
-		for (size_t i = 0; i < hslc; i++) {
-			struct h1_src_loc *hsl = &hslv[i];
+	for (size_t i = 0; i < hslc; i++) {
+		struct h1_src_loc *hsl = &hslv[i];
 
-			fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
-				"<td>%zu</td><td>%0.3f</td><td>%zu</td>",
-				hsl->bytes, hsl->allocations, hsl->frees,
-				hsl->live, hsl->mean_life, hsl->max_life);
-			FPUTS("<td><a\nhref=\"../at/", fp);
+		fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
+			"<td>%zu</td><td>%0.3f</td><td>%zu</td>",
+			hsl->bytes, hsl->allocations, hsl->frees,
+			hsl->live, hsl->mean_life, hsl->max_life);
+		FPUTS("<td><a\nhref=\"../at/", fp);
 
-			write_b64_url(fp, src_loc_hash_tip(hsl->sl),
-					src_loc_hash_len(hsl->sl));
+		write_b64_url(fp, src_loc_hash_tip(hsl->sl),
+				src_loc_hash_len(hsl->sl));
 
-			FPUTS("\">", fp);
-			write_html(fp, hsl->loc_name, hsl->lname_len);
-			FPUTS("</a></td></tr>", fp);
-		}
-		FPUTS("</table></body></html>", fp);
+		FPUTS("\">", fp);
+		write_html(fp, hsl->loc_name, hsl->lname_len);
+		FPUTS("</a></td></tr>", fp);
 	}
-	return h1_200(h1, &bdy, csv ? TYPE_CSV : TYPE_HTML);
+	FPUTS("</table></body></html>", fp);
+	return h1_200(h1, &bdy, TYPE_HTML);
 }
 
 /* /$PID/ root endpoint */
@@ -781,7 +792,7 @@ static enum mw_qev h1_dispatch(struct mw_h1 *h1, struct mw_h1req *h1r)
 		if ((c = PATH_SKIP(h1r, "/each/"))) {
 			errno = 0;
 			char *e;
-			unsigned long min = strtoul(c, &e, 10);
+			size_t min = (size_t)strtoul(c, &e, 10);
 			if (!errno) {
 				if (*e == ' ' || *e == '?')
 					return each_gt(h1, h1r, min, false);
diff --git a/mwrap_core.h b/mwrap_core.h
index deb3bb3..fff0538 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -732,6 +732,7 @@ enomem:
 struct dump_arg {
 	FILE *fp;
 	size_t min;
+	bool dump_csv;
 };
 
 char **bt_syms(void * const *addrlist, uint32_t size)
@@ -754,12 +755,16 @@ static void cleanup_free(void *any)
 	free(*p);
 }
 
+static void *write_csv(FILE *, size_t min, const char *sort, size_t sort_len);
 static void *dump_to_file(struct dump_arg *a)
 {
 	struct cds_lfht_iter iter;
 	struct src_loc *l;
 	struct cds_lfht *t;
 
+	if (a->dump_csv)
+		return write_csv(a->fp, a->min, NULL, 0);
+
 	++locating;
 	rcu_read_lock();
 	t = CMM_LOAD_SHARED(totals);
@@ -857,7 +862,7 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
 {
 	const char *opt = getenv("MWRAP");
 	const char *modes[] = { "a", "a+", "w", "w+", "r+" };
-	struct dump_arg a = { .min = 0 };
+	struct dump_arg a = { .min = 0, .dump_csv = false };
 	size_t i;
 	int dump_fd;
 	char *dump_path;
@@ -870,9 +875,24 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
 		return;
 
 	++locating;
-	if ((dump_path = strstr(opt, "dump_path:")) &&
-			(dump_path += sizeof("dump_path")) &&
-			*dump_path) {
+
+	/* parse dump_csv:$PATHNAME */
+	if ((dump_path = strstr(opt, "dump_csv:"))) {
+		dump_path += sizeof("dump_csv");
+		if (!*dump_path)
+			dump_path = NULL;
+		else
+			a.dump_csv = true;
+	}
+	if (!dump_path) {
+		/* parse dump_path:$PATHNAME */
+		if ((dump_path = strstr(opt, "dump_path:"))) {
+			dump_path += sizeof("dump_path");
+			if (!*dump_path)
+				dump_path = NULL;
+		}
+	}
+	if (dump_path) {
 		char *end = strchr(dump_path, ',');
 		char buf[PATH_MAX];
 		if (end) {
@@ -887,10 +907,13 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
 			fprintf(stderr, "open %s failed: %m\n", dump_path);
 			goto out;
 		}
-	}
-	else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
+	} else if ((s = strstr(opt, "dump_fd:")) &&
+			!sscanf(s, "dump_fd:%d", &dump_fd))
 		goto out;
 
+	/* allow dump_csv standalone for dump_fd */
+	if (!a.dump_csv && strstr(opt, "dump_csv"))
+		a.dump_csv = true;
 	if ((s = strstr(opt, "dump_min:")))
 		sscanf(s, "dump_min:%zu", &a.min);
 
diff --git a/script/mwrap-perl b/script/mwrap-perl
index 182b0bd..eb29176 100644
--- a/script/mwrap-perl
+++ b/script/mwrap-perl
@@ -76,12 +76,20 @@ Dumps the output at exit to a given filename:
 
 	total_bytes	call_count	location
 
-In the future, dumping to a self-describing CSV will be supported.
-
 =item dump_fd:$DESCRIPTOR
 
 As with dump_path, but dumps the output to a given file descriptor.
 
+=item dump_csv:$FILENAME
+
+Dump CSV to the given filename.
+
+This output matches the HTTP server output and includes column headers,
+but is subject to change in future releases.
+
+C<dump_csv> without the C<:> may also be used in conjunction with
+C<dump_fd>, such as C<MWRAP=dump_fd:2,dump_csv>.
+
 =back
 
 =head1 HTTP POST API
diff --git a/t/mwrap.t b/t/mwrap.t
index 6f99715..ccd739b 100644
--- a/t/mwrap.t
+++ b/t/mwrap.t
@@ -9,7 +9,8 @@ my $dump = "$mwrap_tmp/dump";
 {
 	my $env = { MWRAP => "dump_path:$dump,dump_min:10000" };
 	my $nr = 1000;
-	mwrap_run('dump test', $env, '-e', '$x = "hello world" x '.$nr);
+	my $script = '$x = "hello world" x '.$nr;
+	mwrap_run('dump test', $env, '-e', $script);
 	ok(-s $dump, "dump file written to");
 	my $s = slurp($dump);
 	truncate($dump, 0);
@@ -23,6 +24,24 @@ my $dump = "$mwrap_tmp/dump";
 	} else {
 		fail("$s failed to match $re");
 	}
+
+	$env->{MWRAP} = "dump_csv:$dump";
+	mwrap_run('dump_csv test', $env, '-e', $script);
+	ok(-s $dump, "CSV written to path");
+	$s = slurp($dump);
+	truncate($dump, 0);
+	my $nr_comma = ($s =~ tr/,/,/);
+	my $nr_cr = ($s =~ tr/\n/\n/);
+	ok($nr_comma > ($nr_cr * 4), 'CSV has more commas than CR');
+
+	$env->{MWRAP} = 'dump_csv,dump_fd:2';
+	mwrap_run('dump_csv,dump_fd test', $env, '-e', $script);
+	ok(-s $mwrap_err, "CSV written to stderr");
+	$s = slurp($mwrap_err);
+	truncate($mwrap_err, 0);
+	$nr_comma = ($s =~ tr/,/,/);
+	$nr_cr = ($s =~ tr/\n/\n/);
+	ok($nr_comma > ($nr_cr * 4), 'CSV has more commas than CR');
 }
 
 SKIP: { # C++ program which uses malloc via "new"

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/4] add mwrap-decode-csv tool
  2023-01-11  1:12 [PATCH 0/4] CSV-related improvements Eric Wong
  2023-01-11  1:12 ` [PATCH 1/4] support MWRAP=dump_csv:$FILENAME parameter Eric Wong
@ 2023-01-11  1:12 ` Eric Wong
  2023-01-11  1:12 ` [PATCH 3/4] %p => PID expansion for dump_path + dump_csv Eric Wong
  2023-01-11  1:12 ` [PATCH 4/4] rewrite README and update manpage to favor CSV Eric Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-11  1:12 UTC (permalink / raw)
  To: mwrap-perl

This is a useful companion to the dump_csv: directive.
It also fixes a bug where HTML escaping was unnecessarily done
to the CSV output by -rproxy.
---
 MANIFEST                  |  1 +
 Makefile.PL               |  3 ++-
 lib/Devel/Mwrap/Rproxy.pm | 33 +++++++++++++----------
 script/mwrap-decode-csv   | 57 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 79 insertions(+), 15 deletions(-)
 create mode 100644 script/mwrap-decode-csv

diff --git a/MANIFEST b/MANIFEST
index 096cec9..cf42979 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -20,6 +20,7 @@ mymalloc.h
 picohttpparser.h
 picohttpparser_c.h
 ppport.h
+script/mwrap-decode-csv
 script/mwrap-perl
 script/mwrap-rproxy
 t/httpd-unit.t
diff --git a/Makefile.PL b/Makefile.PL
index dadf80b..41e8f03 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -83,7 +83,8 @@ push @writemakefile_args, (
 	CCFLAGS => "$Config{ccflags} $ccflags",
 	PREREQ_PM => {},
 	ABSTRACT_FROM => 'lib/Devel/Mwrap.pm',
-	EXE_FILES => [qw(script/mwrap-perl script/mwrap-rproxy)],
+	EXE_FILES => [qw(script/mwrap-perl script/mwrap-rproxy
+		script/mwrap-decode-csv)],
 	AUTHOR => 'mwrap hackers <mwrap-perl@80x24.org>',
 	LIBS => $LIBS, # e.g. -lurcu-cds
 	LICENSE => 'gpl_2', # GPL-3.0+, CPAN::Meta::Spec limitation
diff --git a/lib/Devel/Mwrap/Rproxy.pm b/lib/Devel/Mwrap/Rproxy.pm
index d5a9d9d..de65685 100644
--- a/lib/Devel/Mwrap/Rproxy.pm
+++ b/lib/Devel/Mwrap/Rproxy.pm
@@ -11,9 +11,8 @@ package Devel::Mwrap::Rproxy;
 use v5.12; # strict
 use Fcntl qw(SEEK_SET);
 use IO::Socket::UNIX;
-use Plack::Util;
 
-sub new { bless { socket_dir => $_[1]}, $_[0] }
+sub new { require Plack::Util; bless { socket_dir => $_[1]}, $_[0] }
 
 sub r {
 	[ $_[0], [
@@ -104,13 +103,26 @@ sub a2l {
 		$a2l ? do {
 			chomp(my $line = $a2l->lookup($addr));
 			$line =~ s/\Q?? at ??:0\E//; # FreeBSD
-			$line = Plack::Util::encode_html($line);
 			$line =~ /\?\?/ ? "$line $exe($addr)" :
 				($line =~ /\S/ ? $line : "$exe($addr)");
 		} : "$exe($addr)"
 	}
 }
 
+sub decode_csv {
+	my ($in, $out) = @_;
+	while (<$in>) {
+		s/\\n/\0\0/g;
+		s!(["\0])
+			([^\("\0]+) # exe
+			\(([^\)"\0]+)\) # addr
+			(["\0])!
+			$1.a2l($2,$3).$4!gex;
+		s/\0\0/\\n/g;
+		$out->write($_);
+	}
+}
+
 sub call { # PSGI entry point
 	my ($self, $env) = @_;
 	my $uri = $env->{REQUEST_URI};
@@ -152,23 +164,16 @@ sub call { # PSGI entry point
 			local %addr2line;
 			# extract executable|library(address)
 			if ($csv) {
-				while (<$c>) {
-					s/\\n/\0\0/g;
-					s!(["\0])
-						([^\("\0]+) # exe
-						\(([^\)"\0]+)\) # addr
-						(["\0])!
-						$1.a2l($2,$3).$4!gex;
-					s/\0\0/\\n/g;
-					$http_out->write($_);
-				}
+				decode_csv($c, $http_out);
 			} else {
 				while (<$c>) {
 					s!>
 						([^\(<]+) # exe
 						\(([^\)<]+)\) # addr
 						<!
-						'>'.a2l($1,$2).'<'!gex;
+						'>'.Plack::Util::encode_html(
+							a2l($1,$2)).
+						'<'!gex;
 					$http_out->write($_);
 				}
 			}
diff --git a/script/mwrap-decode-csv b/script/mwrap-decode-csv
new file mode 100644
index 0000000..5bbc171
--- /dev/null
+++ b/script/mwrap-decode-csv
@@ -0,0 +1,57 @@
+#!perl -w
+# Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
+# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+# addr2line decoder for the output of MWRAP=dump_csv:$FILENAME
+use v5.12;
+use Devel::Mwrap::Rproxy;
+use IO::Handle;
+Devel::Mwrap::Rproxy::decode_csv(*STDIN{IO}, *STDOUT{IO});
+__END__
+=head1 NAME
+
+mwrap-decode-csv - decode non-Perl addresses from mwrap CSV dumps
+
+=head1 SYNOPSIS
+
+  MWRAP=dump_csv:$FILENAME,bt:2 mwrap-perl COMMAND...
+
+  mwrap-decode-csv <$FILENAME
+
+=head1 DESCRIPTION
+
+mwrap-decode-csv is a convenient wrapper for L<addr2line(1)>
+for decoding C backtraces from CSV files.
+
+It reads the CSV via standard input, and emits to standard output.
+
+It expects CSV files emitted by a L<mwrap-perl(1p)> via
+C<MWRAP=dump_csv:$FILENAME> or retrieved directly via C<curl --unix-socket>.
+
+It is not needed for CSVs retrieved via L<mwrap-rproxy(1p)>,
+since mwrap-rproxy already performs the same function as mwrap-decode-csv.
+
+To get useful C backtraces of Perl programs, C<MWRAP=bt:$DEPTH>
+directive must be used (carefully).  See L<mwrap-perl(1p)>.
+
+addr2line from GNU binutils 2.39+ (August 2022) is recommended to
+support C<SYMBOL+OFFSET> addresses.
+
+=head1 CONTACT
+
+Feedback welcome via plain-text mail to L<mailto:mwrap-perl@80x24.org>
+
+Mail archives are hosted at L<https://80x24.org/mwrap-perl/>
+
+=head1 COPYRIGHT
+
+Copyright all contributors L<mailto:mwrap-perl@80x24.org>
+
+License: GPL-3.0+ L<https://www.gnu.org/licenses/gpl-3.0.txt>
+
+Source code is at L<https://80x24.org/mwrap-perl.git/>
+
+=head1 SEE ALSO
+
+L<mwrap-perl(1p)>, L<mwrap-rproxy(1)>
+
+=cut

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/4] %p => PID expansion for dump_path + dump_csv
  2023-01-11  1:12 [PATCH 0/4] CSV-related improvements Eric Wong
  2023-01-11  1:12 ` [PATCH 1/4] support MWRAP=dump_csv:$FILENAME parameter Eric Wong
  2023-01-11  1:12 ` [PATCH 2/4] add mwrap-decode-csv tool Eric Wong
@ 2023-01-11  1:12 ` Eric Wong
  2023-01-11  1:12 ` [PATCH 4/4] rewrite README and update manpage to favor CSV Eric Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-11  1:12 UTC (permalink / raw)
  To: mwrap-perl

This makes it possible to dump per-PID files for processes which
fork.  `%p' matches what the Linux sys.kernel.core_pattern
sysctl understands.
---
 mwrap_core.h      | 15 +++++++++++++++
 script/mwrap-perl |  6 ++++++
 t/mwrap.t         | 15 +++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/mwrap_core.h b/mwrap_core.h
index fff0538..86e4498 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -895,12 +895,27 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
 	if (dump_path) {
 		char *end = strchr(dump_path, ',');
 		char buf[PATH_MAX];
+		AUTO_FREE char *pid_path = NULL;
 		if (end) {
 			mwrap_assert((end - dump_path) < (intptr_t)sizeof(buf));
 			end = mempcpy(buf, dump_path, end - dump_path);
 			*end = 0;
 			dump_path = buf;
 		}
+
+		/* %p => PID expansion (Linux core_pattern uses %p, too) */
+		if ((s = strchr(dump_path, '%')) && s[1] == 'p' &&
+				/* don't allow injecting extra formats: */
+				!strchr(s + 2, '%')) {
+			s[1] = 'd'; /* s/%p/%d/ to make asprintf happy */
+			int n = asprintf(&pid_path, dump_path, (int)getpid());
+			if (n < 0)
+				fprintf(stderr,
+					"asprintf failed: %m, dumping to %s\n",
+					dump_path);
+			else
+				dump_path = pid_path;
+		}
 		dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
 				0666);
 		if (dump_fd < 0) {
diff --git a/script/mwrap-perl b/script/mwrap-perl
index eb29176..371aee6 100644
--- a/script/mwrap-perl
+++ b/script/mwrap-perl
@@ -76,6 +76,10 @@ Dumps the output at exit to a given filename:
 
 	total_bytes	call_count	location
 
+C<$FILENAME> may contain C<%p> where C<%p> is a placeholder for
+the PID being dumped.  No other use of C<%> is accepted, and
+multiple C<%> means all C<%> (including C<%p>) are handled as-is.
+
 =item dump_fd:$DESCRIPTOR
 
 As with dump_path, but dumps the output to a given file descriptor.
@@ -90,6 +94,8 @@ but is subject to change in future releases.
 C<dump_csv> without the C<:> may also be used in conjunction with
 C<dump_fd>, such as C<MWRAP=dump_fd:2,dump_csv>.
 
+Expands C<%p> to the PID in C<$FILENAME> as described for C<dump_path:>
+
 =back
 
 =head1 HTTP POST API
diff --git a/t/mwrap.t b/t/mwrap.t
index ccd739b..783f6e7 100644
--- a/t/mwrap.t
+++ b/t/mwrap.t
@@ -42,6 +42,21 @@ my $dump = "$mwrap_tmp/dump";
 	$nr_comma = ($s =~ tr/,/,/);
 	$nr_cr = ($s =~ tr/\n/\n/);
 	ok($nr_comma > ($nr_cr * 4), 'CSV has more commas than CR');
+
+	$env->{MWRAP} = "dump_path:$dump.%p";
+	mwrap_run('dump_path PID expansion', $env, '-e', $script);
+	my @d = grep(/\.\d+\z/, glob("$dump.*"));
+	is(scalar(@d), 1, 'got PID file') or diag explain([glob("$dump*")]);
+	unlink(@d) or BAIL_OUT "unlink: $!";
+
+	# don't allow injecting random formats
+	for my $fmt ('%p.%m', '%m.%p') {
+		my $fn = $dump.$fmt;
+		$env->{MWRAP} = "dump_path:$fn";
+		mwrap_run("PID expansion fails on $fmt", $env, '-e', $script);
+		ok($fn, "$fmt used as-is");
+		unlink($fn) or BAIL_OUT "unlink: $!";
+	}
 }
 
 SKIP: { # C++ program which uses malloc via "new"

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/4] rewrite README and update manpage to favor CSV
  2023-01-11  1:12 [PATCH 0/4] CSV-related improvements Eric Wong
                   ` (2 preceding siblings ...)
  2023-01-11  1:12 ` [PATCH 3/4] %p => PID expansion for dump_path + dump_csv Eric Wong
@ 2023-01-11  1:12 ` Eric Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-11  1:12 UTC (permalink / raw)
  To: mwrap-perl

The CSV format has far more info, is self-describing, and
Devel::Mwrap->dump never existed.  I'm not sure if I care to
support a Perl API now that the AF_UNIX HTTP interface exists.
---
 Mwrap.xs          |  5 ++++
 README            | 61 ++++++++++++++++++++++++++---------------------
 script/mwrap-perl | 26 ++++++++++----------
 3 files changed, 52 insertions(+), 40 deletions(-)

diff --git a/Mwrap.xs b/Mwrap.xs
index 4d4c996..cbb7d15 100644
--- a/Mwrap.xs
+++ b/Mwrap.xs
@@ -6,6 +6,11 @@
 #define MWRAP_PERL 1
 #include "mwrap_core.h"
 
+/*
+ * The Devel::Mwrap Perl API is probably no longer useful now that
+ * the AF_UNIX HTTP interface exists.  It'll probably remain undocumented.
+ */
+
 /*
  * I hate typedefs, especially when they're hiding the fact that there's
  * a pointer, but XS needs this, apparently, and it does s/__/::/g
diff --git a/README b/README
index 1fa1dfb..88abe7d 100644
--- a/README
+++ b/README
@@ -4,11 +4,17 @@ Devel::Mwrap is designed to answer the question:
 
    Which lines of Perl are hitting malloc the most?
 
-Devel::Mwrap wraps all malloc-family calls to trace the Perl source
-location of such calls and bytes allocated at each callsite.  It
-can also function as a leak detector and show live allocations
-at every call site.  Depending on your application and workload,
-the overhead is roughly a 50%-100% increase memory and runtime.
+Devel::Mwrap wraps all malloc-family calls to trace the Perl
+source location of such calls and bytes allocated at each
+callsite.  It is primarily designed to identify high memory use,
+but may function as a leak detector as it can show live
+allocations at every call site.  Depending on your application
+and workload, the overhead is roughly a 50%-100% increase memory
+and runtime.
+
+It also gives configurable backtraces of all dynamically-linked
+malloc callsites for any program where backtrace(3) works,
+including programs not linked to Perl.
 
 It is thread-safe and requires the concurrent lock-free hash table
 from the Userspace RCU project: https://liburcu.org/
@@ -31,31 +37,30 @@ See `INSTALL' document
 
 == Usage
 
-Devel::Mwrap works as an LD_PRELOAD and supplies a mwrap-perl script to
-improve ease-of-use.  You can set dump_path: in the MWRAP environment
-variable to append the results to a log file:
+Devel::Mwrap works as an LD_PRELOAD and supplies a mwrap-perl wrapper to
+improve ease-of-use.  You can set dump_csv: in the MWRAP environment
+variable to append the results to a CSV file:
 
-	MWRAP=dump_path:/path/to/log mwrap-perl PERL_COMMAND
+	MWRAP=dump_csv:/path/to/csv.%p mwrap-perl COMMAND
 
-	# And to display the locations with the most allocations:
-	sort -k1,1rn </path/to/log | $PAGER
+For long running processes, you can see the AF_UNIX HTTP interface:
 
-You may also `use Devel::Mwrap' in your Perl code and use
-Devel::Mwrap->dump, Devel::Mwrap->reset, Devel::Mwrap->each, etc.
+	MWRAP=socket_dir:/some/dir mwrap-perl COMMAND
 
-However, Devel::Mwrap MUST be loaded via LD_PRELOAD to have any
-effect in tracking malloc use.  However, it is safe to keep
-"use Devel::Mwrap" in performance-critical deployments,
-as overhead is only incurred when used as an LD_PRELOAD.
+And connect via `curl --unix-socket /some/dir/$PID.sock' or `mwrap-rproxy'.
+See mwrap-rproxy(1p) and mwrap-perl(1p) manpages for more info.
 
-The output of the Devel::Mwrap->dump is a text file with 3 columns:
+== Compared to other tools
 
-	total_bytes	call_count	location
+* mwrap-perl knows about Perl code, and an `mwrap' RubyGem exists, too
 
-Where location is a Perl source location or an address retrieved
-by backtrace_symbols(3).  It is recommended to use the sort(1)
-command on either of the first two columns to find the hottest
-malloc locations.
+* mwrap does not catch memory errors; use ASan, valgrind, or similar
+
+* mwrap is reasonably fast, fast enough for the author to run on
+  production-facing Perl daemons
+
+* the AF_UNIX HTTP interface allows inspecting live processes without
+  interruption instead of waiting for an exit dump
 
 == Known problems
 
@@ -66,6 +71,8 @@ malloc locations.
 
 * Perl source files over 16.7 million lines long are not supported :P
 
+* large C backtraces (off by default) are expensive for large programs
+
 == Public mail archives (HTTP, Atom feeds, IMAP mailbox, NNTP group, POP3):
 
 	https://80x24.org/mwrap-perl/
@@ -73,10 +80,10 @@ malloc locations.
 	nntps://80x24.org/inbox.comp.lang.perl.mwrap
 	https://80x24.org/mwrap-perl/_/text/help/#pop3
 
-No subscription nor real identities will ever be required to obtain support,
-but HTML mail is rejected.  Memory usage reductions start with you;
-only send plain-text mail to us and do not top-post.  HTML mail and
-top-posting costs everybody memory and bandwidth.
+No subscription nor real identities will ever be required to obtain
+support or contribute, HTML mail is rejected.  Memory usage reductions
+start with you; only send plain-text mail to us and do not top-post.
+HTML mail and top-posting costs everybody memory and bandwidth.
 
 		mwrap-perl@80x24.org
 
diff --git a/script/mwrap-perl b/script/mwrap-perl
index 371aee6..cf88375 100644
--- a/script/mwrap-perl
+++ b/script/mwrap-perl
@@ -70,31 +70,31 @@ This may be changed via POST request (see below).
 
 Default: 0
 
-=item dump_path:$FILENAME
+=item dump_csv:$FILENAME
 
-Dumps the output at exit to a given filename:
+Dump CSV to the given filename.
 
-	total_bytes	call_count	location
+This output matches the HTTP server output and includes column headers,
+but is subject to change in future releases.
+
+C<dump_csv> without the C<:> may also be used in conjunction with
+C<dump_fd>, such as C<MWRAP=dump_fd:2,dump_csv>.
 
 C<$FILENAME> may contain C<%p> where C<%p> is a placeholder for
 the PID being dumped.  No other use of C<%> is accepted, and
 multiple C<%> means all C<%> (including C<%p>) are handled as-is.
 
-=item dump_fd:$DESCRIPTOR
-
-As with dump_path, but dumps the output to a given file descriptor.
+=item dump_path:$FILENAME
 
-=item dump_csv:$FILENAME
+Gives a simpler, legacy output compatible with the old Ruby version:
 
-Dump CSV to the given filename.
+	total_bytes	call_count	location
 
-This output matches the HTTP server output and includes column headers,
-but is subject to change in future releases.
+Expands C<%p> to the PID in C<$FILENAME> as described for C<dump_csv>
 
-C<dump_csv> without the C<:> may also be used in conjunction with
-C<dump_fd>, such as C<MWRAP=dump_fd:2,dump_csv>.
+=item dump_fd:$DESCRIPTOR
 
-Expands C<%p> to the PID in C<$FILENAME> as described for C<dump_path:>
+As with dump_path, but dumps the output to a given file descriptor.
 
 =back
 

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-01-11  1:12 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-11  1:12 [PATCH 0/4] CSV-related improvements Eric Wong
2023-01-11  1:12 ` [PATCH 1/4] support MWRAP=dump_csv:$FILENAME parameter Eric Wong
2023-01-11  1:12 ` [PATCH 2/4] add mwrap-decode-csv tool Eric Wong
2023-01-11  1:12 ` [PATCH 3/4] %p => PID expansion for dump_path + dump_csv Eric Wong
2023-01-11  1:12 ` [PATCH 4/4] rewrite README and update manpage to favor CSV Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mwrap-perl.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).