From 8ce0068f470f3dad3a2920e7fdeedeee235c44eb Mon Sep 17 00:00:00 2001
From: Eric Wong <e@80x24.org>
Date: Wed, 11 Jan 2023 01:12:46 +0000
Subject: support MWRAP=dump_csv:$FILENAME parameter

Just reusing code from httpd.
---
 httpd.h           | 217 ++++++++++++++++++++++++++++--------------------------
 mwrap_core.h      |  35 +++++++--
 script/mwrap-perl |  12 ++-
 t/mwrap.t         |  21 +++++-
 4 files changed, 173 insertions(+), 112 deletions(-)

diff --git a/httpd.h b/httpd.h
index ef4d83c..9219d36 100644
--- a/httpd.h
+++ b/httpd.h
@@ -504,9 +504,11 @@ static off_t write_loc_name(FILE *fp, const struct src_loc *l)
 	return end - beg;
 }
 
-static struct h1_src_loc *accumulate(unsigned long min, size_t *hslc, FILE *lp)
+static struct h1_src_loc *
+accumulate(struct mw_fbuf *lb, unsigned long min, size_t *hslc)
 {
 	struct mw_fbuf fb;
+	if (!fbuf_init(lb)) return NULL;
 	if (!fbuf_init(&fb)) return NULL;
 	rcu_read_lock();
 	struct cds_lfht *t = CMM_LOAD_SHARED(totals);
@@ -528,18 +530,23 @@ static struct h1_src_loc *accumulate(unsigned long min, size_t *hslc, FILE *lp)
 			HUGE_VAL;
 		hsl.max_life = uatomic_read(&l->max_lifespan);
 		hsl.sl = l;
-		hsl.lname_len = write_loc_name(lp, l);
+		hsl.lname_len = write_loc_name(lb->fp, l);
 		fwrite(&hsl, sizeof(hsl), 1, fb.fp);
 	}
 	rcu_read_unlock();
 
-	struct h1_src_loc *hslv;
-	if (fbuf_close(&fb)) {
-		hslv = NULL;
-	} else {
-		*hslc = fb.len / sizeof(*hslv);
-		mwrap_assert((fb.len % sizeof(*hslv)) == 0);
-		hslv = (struct h1_src_loc *)fb.ptr;
+	if (fbuf_close(&fb) || fbuf_close(lb))
+		return NULL;
+
+	struct h1_src_loc *hslv = (struct h1_src_loc *)fb.ptr;
+	*hslc = fb.len / sizeof(*hslv);
+	mwrap_assert((fb.len % sizeof(*hslv)) == 0);
+	char *n = lb->ptr;
+	for (size_t i = 0; i < *hslc; ++i) {
+		hslv[i].loc_name = n;
+		n += hslv[i].lname_len;
+		if (hslv[i].lname_len < 0)
+			return NULL;
 	}
 	return hslv;
 }
@@ -609,124 +616,128 @@ static enum mw_qev each_at(struct mw_h1 *h1, struct mw_h1req *h1r)
 	return h1_200(h1, &html, TYPE_HTML);
 }
 
-/* /$PID/each/$MIN endpoint */
-static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
-				unsigned long min, bool csv)
-{
-	static const char default_sort[] = "bytes";
-	const char *sort;
-	size_t sort_len = 0;
+typedef int (*cmp_fn)(const void *, const void *);
 
-	if (!csv) {
-		sort = default_sort;
-		sort_len = sizeof(default_sort) - 1;
+static cmp_fn write_csv_header(FILE *fp, const char *sort, size_t sort_len)
+{
+	cmp_fn cmp = NULL;
+	for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+		const char *fn = fields[i].fname;
+		if (i)
+			fputc(',', fp);
+		fputs(fn, fp);
+		if (fields[i].flen == sort_len && !memcmp(fn, sort, sort_len))
+			cmp = fields[i].cmp;
 	}
+	fputc('\n', fp);
+	return cmp;
+}
 
-	if (h1r->qstr && h1r->qlen > 5 && !memcmp(h1r->qstr, "sort=", 5)) {
-		sort = h1r->qstr + 5;
-		sort_len = h1r->qlen - 5;
+static void write_csv_data(FILE *fp, struct h1_src_loc *hslv, size_t hslc)
+{
+	for (size_t i = 0; i < hslc; i++) {
+		struct h1_src_loc *hsl = &hslv[i];
+
+		fprintf(fp, "%zu,%zu,%zu,%zu,%0.3f,%zu,",
+			hsl->bytes, hsl->allocations, hsl->frees,
+			hsl->live, hsl->mean_life, hsl->max_life);
+		write_q_csv(fp, hsl->loc_name, hsl->lname_len);
+		fputc('\n', fp);
 	}
+}
 
-	size_t hslc;
+static void *write_csv(FILE *fp, size_t min, const char *sort, size_t sort_len)
+{
 	AUTO_CLOFREE struct mw_fbuf lb;
-	if (!fbuf_init(&lb)) return h1_close(h1);
-	AUTO_FREE struct h1_src_loc *hslv = accumulate(min, &hslc, lb.fp);
-	if (!hslv)
-		return h1_close(h1);
+	size_t hslc;
+	AUTO_FREE struct h1_src_loc *hslv = accumulate(&lb, min, &hslc);
+	if (!hslv) return NULL;
 
-	if (fbuf_close(&lb))
-		return h1_close(h1);
+	cmp_fn cmp = write_csv_header(fp, sort, sort_len);
+	if (cmp)
+		qsort(hslv, hslc, sizeof(*hslv), cmp);
+	write_csv_data(fp, hslv, hslc);
+	return fp;
+}
 
-	char *n = lb.ptr;
-	for (size_t i = 0; i < hslc; ++i) {
-		hslv[i].loc_name = n;
-		n += hslv[i].lname_len;
-		if (hslv[i].lname_len < 0)
-			return h1_close(h1);
+/* /$PID/each/$MIN endpoint */
+static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
+				size_t min, bool csv)
+{
+	static const char default_sort[] = "bytes";
+	const char *sort = csv ? NULL : default_sort;
+	size_t sort_len = csv ? 0 : (sizeof(default_sort) - 1);
+
+	if (h1r->qstr && h1r->qlen > 5 && !memcmp(h1r->qstr, "sort=", 5)) {
+		sort = h1r->qstr + 5;
+		sort_len = h1r->qlen - 5;
 	}
 
 	struct mw_fbuf bdy;
 	FILE *fp = wbuf_init(&bdy);
 	if (!fp) return h1_close(h1);
-
-	if (!csv) {
-		unsigned depth = (unsigned)CMM_LOAD_SHARED(bt_req_depth);
-		fprintf(fp, "<html><head><title>mwrap each &gt;%lu"
-			"</title></head><body><p>mwrap each &gt;%lu "
-			"(change `%lu' in URL to adjust filtering) - "
-			"MWRAP=bt:%u <a href=\"%lu.csv\">.csv</a>",
-			min, min, min, depth, min);
-		show_stats(fp);
-		/* need borders to distinguish multi-level traces */
-		if (depth)
-			FPUTS("<table\nborder=1><tr>", fp);
-		else /* save screen space if only tracing one line */
-			FPUTS("<table><tr>", fp);
+	if (csv) {
+		if (write_csv(fp, min, sort, sort_len))
+			return h1_200(h1, &bdy, TYPE_CSV);
+		return h1_close(h1);
 	}
 
-	int (*cmp)(const void *, const void *) = NULL;
-	if (csv) {
-		for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
-			const char *fn = fields[i].fname;
-			if (i)
-				fputc(',', fp);
-			fputs(fn, fp);
-			if (fields[i].flen == sort_len &&
-					!memcmp(fn, sort, sort_len))
-				cmp = fields[i].cmp;
-		}
-		fputc('\n', fp);
-	} else {
-		for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
-			const char *fn = fields[i].fname;
-			FPUTS("<th>", fp);
-			if (fields[i].flen == sort_len &&
-					!memcmp(fn, sort, sort_len)) {
-				cmp = fields[i].cmp;
-				fprintf(fp, "<b>%s</b>", fields[i].fname);
-			} else {
-				fprintf(fp, "<a\nhref=\"./%lu?sort=%s\">%s</a>",
-					min, fn, fn);
-			}
-			FPUTS("</th>", fp);
+	size_t hslc;
+	AUTO_CLOFREE struct mw_fbuf lb;
+	AUTO_FREE struct h1_src_loc *hslv = accumulate(&lb, min, &hslc);
+	if (!hslv)
+		return h1_close(h1);
+
+	unsigned depth = (unsigned)CMM_LOAD_SHARED(bt_req_depth);
+	fprintf(fp, "<html><head><title>mwrap each &gt;%lu"
+		"</title></head><body><p>mwrap each &gt;%lu "
+		"(change `%lu' in URL to adjust filtering) - "
+		"MWRAP=bt:%u <a href=\"%lu.csv\">.csv</a>",
+		min, min, min, depth, min);
+	show_stats(fp);
+	/* need borders to distinguish multi-level traces */
+	if (depth)
+		FPUTS("<table\nborder=1><tr>", fp);
+	else /* save screen space if only tracing one line */
+		FPUTS("<table><tr>", fp);
+	cmp_fn cmp = NULL;
+	for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+		const char *fn = fields[i].fname;
+		FPUTS("<th>", fp);
+		if (fields[i].flen == sort_len &&
+				!memcmp(fn, sort, sort_len)) {
+			cmp = fields[i].cmp;
+			fprintf(fp, "<b>%s</b>", fields[i].fname);
+		} else {
+			fprintf(fp, "<a\nhref=\"./%lu?sort=%s\">%s</a>",
+				min, fn, fn);
 		}
+		FPUTS("</th>", fp);
 	}
-	if (!csv)
-		FPUTS("</tr>", fp);
+	FPUTS("</tr>", fp);
 	if (cmp)
 		qsort(hslv, hslc, sizeof(*hslv), cmp);
-	else if (!csv)
+	else
 		FPUTS("<tr><td>sort= not understood</td></tr>", fp);
-	if (csv) {
-		for (size_t i = 0; i < hslc; i++) {
-			struct h1_src_loc *hsl = &hslv[i];
 
-			fprintf(fp, "%zu,%zu,%zu,%zu,%0.3f,%zu,",
-				hsl->bytes, hsl->allocations, hsl->frees,
-				hsl->live, hsl->mean_life, hsl->max_life);
-			write_q_csv(fp, hsl->loc_name, hsl->lname_len);
-			fputc('\n', fp);
-		}
-	} else {
-		for (size_t i = 0; i < hslc; i++) {
-			struct h1_src_loc *hsl = &hslv[i];
+	for (size_t i = 0; i < hslc; i++) {
+		struct h1_src_loc *hsl = &hslv[i];
 
-			fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
-				"<td>%zu</td><td>%0.3f</td><td>%zu</td>",
-				hsl->bytes, hsl->allocations, hsl->frees,
-				hsl->live, hsl->mean_life, hsl->max_life);
-			FPUTS("<td><a\nhref=\"../at/", fp);
+		fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
+			"<td>%zu</td><td>%0.3f</td><td>%zu</td>",
+			hsl->bytes, hsl->allocations, hsl->frees,
+			hsl->live, hsl->mean_life, hsl->max_life);
+		FPUTS("<td><a\nhref=\"../at/", fp);
 
-			write_b64_url(fp, src_loc_hash_tip(hsl->sl),
-					src_loc_hash_len(hsl->sl));
+		write_b64_url(fp, src_loc_hash_tip(hsl->sl),
+				src_loc_hash_len(hsl->sl));
 
-			FPUTS("\">", fp);
-			write_html(fp, hsl->loc_name, hsl->lname_len);
-			FPUTS("</a></td></tr>", fp);
-		}
-		FPUTS("</table></body></html>", fp);
+		FPUTS("\">", fp);
+		write_html(fp, hsl->loc_name, hsl->lname_len);
+		FPUTS("</a></td></tr>", fp);
 	}
-	return h1_200(h1, &bdy, csv ? TYPE_CSV : TYPE_HTML);
+	FPUTS("</table></body></html>", fp);
+	return h1_200(h1, &bdy, TYPE_HTML);
 }
 
 /* /$PID/ root endpoint */
@@ -781,7 +792,7 @@ static enum mw_qev h1_dispatch(struct mw_h1 *h1, struct mw_h1req *h1r)
 		if ((c = PATH_SKIP(h1r, "/each/"))) {
 			errno = 0;
 			char *e;
-			unsigned long min = strtoul(c, &e, 10);
+			size_t min = (size_t)strtoul(c, &e, 10);
 			if (!errno) {
 				if (*e == ' ' || *e == '?')
 					return each_gt(h1, h1r, min, false);
diff --git a/mwrap_core.h b/mwrap_core.h
index deb3bb3..fff0538 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -732,6 +732,7 @@ enomem:
 struct dump_arg {
 	FILE *fp;
 	size_t min;
+	bool dump_csv;
 };
 
 char **bt_syms(void * const *addrlist, uint32_t size)
@@ -754,12 +755,16 @@ static void cleanup_free(void *any)
 	free(*p);
 }
 
+static void *write_csv(FILE *, size_t min, const char *sort, size_t sort_len);
 static void *dump_to_file(struct dump_arg *a)
 {
 	struct cds_lfht_iter iter;
 	struct src_loc *l;
 	struct cds_lfht *t;
 
+	if (a->dump_csv)
+		return write_csv(a->fp, a->min, NULL, 0);
+
 	++locating;
 	rcu_read_lock();
 	t = CMM_LOAD_SHARED(totals);
@@ -857,7 +862,7 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
 {
 	const char *opt = getenv("MWRAP");
 	const char *modes[] = { "a", "a+", "w", "w+", "r+" };
-	struct dump_arg a = { .min = 0 };
+	struct dump_arg a = { .min = 0, .dump_csv = false };
 	size_t i;
 	int dump_fd;
 	char *dump_path;
@@ -870,9 +875,24 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
 		return;
 
 	++locating;
-	if ((dump_path = strstr(opt, "dump_path:")) &&
-			(dump_path += sizeof("dump_path")) &&
-			*dump_path) {
+
+	/* parse dump_csv:$PATHNAME */
+	if ((dump_path = strstr(opt, "dump_csv:"))) {
+		dump_path += sizeof("dump_csv");
+		if (!*dump_path)
+			dump_path = NULL;
+		else
+			a.dump_csv = true;
+	}
+	if (!dump_path) {
+		/* parse dump_path:$PATHNAME */
+		if ((dump_path = strstr(opt, "dump_path:"))) {
+			dump_path += sizeof("dump_path");
+			if (!*dump_path)
+				dump_path = NULL;
+		}
+	}
+	if (dump_path) {
 		char *end = strchr(dump_path, ',');
 		char buf[PATH_MAX];
 		if (end) {
@@ -887,10 +907,13 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
 			fprintf(stderr, "open %s failed: %m\n", dump_path);
 			goto out;
 		}
-	}
-	else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
+	} else if ((s = strstr(opt, "dump_fd:")) &&
+			!sscanf(s, "dump_fd:%d", &dump_fd))
 		goto out;
 
+	/* allow dump_csv standalone for dump_fd */
+	if (!a.dump_csv && strstr(opt, "dump_csv"))
+		a.dump_csv = true;
 	if ((s = strstr(opt, "dump_min:")))
 		sscanf(s, "dump_min:%zu", &a.min);
 
diff --git a/script/mwrap-perl b/script/mwrap-perl
index 182b0bd..eb29176 100644
--- a/script/mwrap-perl
+++ b/script/mwrap-perl
@@ -76,12 +76,20 @@ Dumps the output at exit to a given filename:
 
 	total_bytes	call_count	location
 
-In the future, dumping to a self-describing CSV will be supported.
-
 =item dump_fd:$DESCRIPTOR
 
 As with dump_path, but dumps the output to a given file descriptor.
 
+=item dump_csv:$FILENAME
+
+Dump CSV to the given filename.
+
+This output matches the HTTP server output and includes column headers,
+but is subject to change in future releases.
+
+C<dump_csv> without the C<:> may also be used in conjunction with
+C<dump_fd>, such as C<MWRAP=dump_fd:2,dump_csv>.
+
 =back
 
 =head1 HTTP POST API
diff --git a/t/mwrap.t b/t/mwrap.t
index 6f99715..ccd739b 100644
--- a/t/mwrap.t
+++ b/t/mwrap.t
@@ -9,7 +9,8 @@ my $dump = "$mwrap_tmp/dump";
 {
 	my $env = { MWRAP => "dump_path:$dump,dump_min:10000" };
 	my $nr = 1000;
-	mwrap_run('dump test', $env, '-e', '$x = "hello world" x '.$nr);
+	my $script = '$x = "hello world" x '.$nr;
+	mwrap_run('dump test', $env, '-e', $script);
 	ok(-s $dump, "dump file written to");
 	my $s = slurp($dump);
 	truncate($dump, 0);
@@ -23,6 +24,24 @@ my $dump = "$mwrap_tmp/dump";
 	} else {
 		fail("$s failed to match $re");
 	}
+
+	$env->{MWRAP} = "dump_csv:$dump";
+	mwrap_run('dump_csv test', $env, '-e', $script);
+	ok(-s $dump, "CSV written to path");
+	$s = slurp($dump);
+	truncate($dump, 0);
+	my $nr_comma = ($s =~ tr/,/,/);
+	my $nr_cr = ($s =~ tr/\n/\n/);
+	ok($nr_comma > ($nr_cr * 4), 'CSV has more commas than CR');
+
+	$env->{MWRAP} = 'dump_csv,dump_fd:2';
+	mwrap_run('dump_csv,dump_fd test', $env, '-e', $script);
+	ok(-s $mwrap_err, "CSV written to stderr");
+	$s = slurp($mwrap_err);
+	truncate($mwrap_err, 0);
+	$nr_comma = ($s =~ tr/,/,/);
+	$nr_cr = ($s =~ tr/\n/\n/);
+	ok($nr_comma > ($nr_cr * 4), 'CSV has more commas than CR');
 }
 
 SKIP: { # C++ program which uses malloc via "new"
-- 
cgit v1.2.3-24-ge0c7