mwrap (Perl version) user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: mwrap-perl@80x24.org
Subject: [PATCH 12/19] httpd: support CSV output
Date: Thu, 15 Dec 2022 20:52:48 +0000	[thread overview]
Message-ID: <20221215205255.27840-13-e@80x24.org> (raw)
In-Reply-To: <20221215205255.27840-1-e@80x24.org>

CSV is well-supported by SQLite (and many other tools) so it can
be useful for offline analysis.
---
 mwrap_httpd.h   | 171 ++++++++++++++++++++++++++++++++----------------
 t/mwrap-httpd.t |  18 +++++
 2 files changed, 134 insertions(+), 55 deletions(-)

diff --git a/mwrap_httpd.h b/mwrap_httpd.h
index 3d2bf99..f484bdd 100644
--- a/mwrap_httpd.h
+++ b/mwrap_httpd.h
@@ -30,6 +30,8 @@
 #include <pthread.h>
 #include <stdbool.h>
 #define URL "https://80x24.org/mwrap-perl.git/about"
+#define TYPE_HTML "text/html; charset=UTF-8"
+#define TYPE_CSV "text/csv"
 
 enum mw_qev {
 	MW_QEV_IGNORE = 0,
@@ -128,7 +130,7 @@ static int cmp_location(const void *x, const void *y)
 	return strcmp(a->loc_name, b->loc_name);
 }
 
-/* fields for /each/$MIN/ endpoint */
+/* fields for /each/$MIN{,.csv} endpoints */
 struct h1_tbl {
 	const char *fname;
 	size_t flen;
@@ -257,7 +259,7 @@ static enum mw_qev h1_res_oneshot(struct mw_h1 *h1, const char *buf, size_t len)
 }
 
 #define FPUTS(STR, fp) fwrite(STR, sizeof(STR) - 1, 1, fp)
-static enum mw_qev h1_200(struct mw_h1 *h1, struct mw_fbuf *fb)
+static enum mw_qev h1_200(struct mw_h1 *h1, struct mw_fbuf *fb, const char *ct)
 {
 	/*
 	 * the HTTP header goes at the END of the body buffer,
@@ -275,10 +277,8 @@ static enum mw_qev h1_200(struct mw_h1 *h1, struct mw_fbuf *fb)
 		"Expires: Fri, 01 Jan 1980 00:00:00 GMT\r\n"
 		"Pragma: no-cache\r\n"
 		"Cache-Control: no-cache, max-age=0, must-revalidate\r\n"
-		"Content-Type: text/html; charset=UTF-8\r\n"
-		"Content-Length: ", fb->fp);
-	fprintf(fb->fp, "%zu", (size_t)clen);
-	FPUTS("\r\n\r\n", fb->fp);
+		"Content-Type: ", fb->fp);
+	fprintf(fb->fp, "%s\r\nContent-Length: %zu\r\n\r\n", ct, (size_t)clen);
 
 	if (fbuf_close(fb))
 		return h1_close(h1);
@@ -354,6 +354,25 @@ static void write_html(FILE *fp, const char *s, size_t len)
 	}
 }
 
+/*
+ * quotes multi-line backtraces for CSV (and `\' and `"' in case
+ * we encounter nasty file names).
+ */
+static void write_q_csv(FILE *fp, const char *s, size_t len)
+{
+	fputc('"', fp);
+	for (; len--; ++s) {
+		switch (*s) {
+		case '\n': fputs("\\n", fp); break;
+		case '\\': fputs("\\\\", fp); break;
+		case '"': fputs("\\\"", fp); break;
+		default: fputc(*s, fp);
+		}
+	}
+	fputc('"', fp);
+}
+
+
 /* URI-safe base-64 (RFC 4648) */
 static void write_b64_url(FILE *fp, const uint8_t *in, size_t len)
 {
@@ -559,12 +578,12 @@ static enum mw_qev each_at(struct mw_h1 *h1, struct mw_h1req *h1r)
 	}
 	rcu_read_unlock();
 	FPUTS("</table></body></html>", fp);
-	return h1_200(h1, &html);
+	return h1_200(h1, &html, TYPE_HTML);
 }
 
 /* /$PID/each/$MIN endpoint */
 static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
-				unsigned long min)
+				unsigned long min, bool csv)
 {
 	static const char default_sort[] = "bytes";
 	const char *sort = default_sort;
@@ -593,58 +612,86 @@ static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
 			return h1_close(h1);
 	}
 
-	struct mw_fbuf html;
-	FILE *fp = wbuf_init(&html);
+	struct mw_fbuf bdy;
+	FILE *fp = wbuf_init(&bdy);
 	if (!fp) return h1_close(h1);
-	fprintf(fp, "<html><head><title>mwrap each &gt;%lu"
-		"</title></head><body><p>mwrap each &gt;%lu "
-		"(change `%lu' in URL to adjust filtering) - MWRAP=bt:%u",
-		min, min, min, (unsigned)bt_req_depth);
 
-	show_stats(fp);
-	if (bt_req_depth) /* need borders to distinguish multi-level traces */
-		FPUTS("<table\nborder=1><tr>", fp);
-	else /* save screen space if only tracing one line */
-		FPUTS("<table><tr>", fp);
+	if (!csv) {
+		fprintf(fp, "<html><head><title>mwrap each &gt;%lu"
+			"</title></head><body><p>mwrap each &gt;%lu "
+			"(change `%lu' in URL to adjust filtering) - "
+			"MWRAP=bt:%u", min, min, min, (unsigned)bt_req_depth);
+		show_stats(fp);
+		/* need borders to distinguish multi-level traces */
+		if (bt_req_depth)
+			FPUTS("<table\nborder=1><tr>", fp);
+		else /* save screen space if only tracing one line */
+			FPUTS("<table><tr>", fp);
+	}
 
 	int (*cmp)(const void *, const void *) = NULL;
-	for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
-		FPUTS("<th>", fp);
-		if (fields[i].flen == sort_len &&
-				!memcmp(fields[i].fname, sort, sort_len)) {
-			cmp = fields[i].cmp;
-			fprintf(fp, "<b>%s</b>", fields[i].fname);
-		} else {
-			fprintf(fp,
-				"<a\nhref=\"./%lu?sort=%s\">%s</a>",
-				min, fields[i].fname, fields[i].fname);
+	if (csv) {
+		for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+			const char *fn = fields[i].fname;
+			if (i)
+				fputc(',', fp);
+			fputs(fn, fp);
+			if (fields[i].flen == sort_len &&
+					!memcmp(fn, sort, sort_len))
+				cmp = fields[i].cmp;
+		}
+		fputc('\n', fp);
+	} else {
+		for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+			const char *fn = fields[i].fname;
+			FPUTS("<th>", fp);
+			if (fields[i].flen == sort_len &&
+					!memcmp(fn, sort, sort_len)) {
+				cmp = fields[i].cmp;
+				fprintf(fp, "<b>%s</b>", fields[i].fname);
+			} else {
+				fprintf(fp, "<a\nhref=\"./%lu?sort=%s\">%s</a>",
+					min, fn, fn);
+			}
+			FPUTS("</th>", fp);
 		}
-		FPUTS("</th>", fp);
 	}
-	FPUTS("</tr>", fp);
+	if (!csv)
+		FPUTS("</tr>", fp);
 	if (cmp)
 		qsort(hslv, hslc, sizeof(*hslv), cmp);
-	else
+	else if (!csv)
 		FPUTS("<tr><td>sort= not understood</td></tr>", fp);
-	for (size_t i = 0; i < hslc; i++) {
-		struct h1_src_loc *hsl = &hslv[i];
-
-		fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
-			"<td>%zu</td><td>%0.3f</td><td>%zu</td>",
-			hsl->bytes, hsl->allocations, hsl->frees,
-			hsl->live, hsl->mean_life, hsl->max_life);
-		FPUTS("<td><a\nhref=\"../at/", fp);
-
-		/* yes, we're writing our memory addresses into the URI */
-		write_b64_url(fp, (const void *)&hsl->sl->f,
-				src_loc_hash_len(hsl->sl));
-
-		FPUTS("\">", fp);
-		write_html(fp, hsl->loc_name, hsl->lname_len);
-		FPUTS("</a></td></tr>", fp);
+	if (csv) {
+		for (size_t i = 0; i < hslc; i++) {
+			struct h1_src_loc *hsl = &hslv[i];
+
+			fprintf(fp, "%zu,%zu,%zu,%zu,%0.3f,%zu,",
+				hsl->bytes, hsl->allocations, hsl->frees,
+				hsl->live, hsl->mean_life, hsl->max_life);
+			write_q_csv(fp, hsl->loc_name, hsl->lname_len);
+			fputc('\n', fp);
+		}
+	} else {
+		for (size_t i = 0; i < hslc; i++) {
+			struct h1_src_loc *hsl = &hslv[i];
+
+			fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
+				"<td>%zu</td><td>%0.3f</td><td>%zu</td>",
+				hsl->bytes, hsl->allocations, hsl->frees,
+				hsl->live, hsl->mean_life, hsl->max_life);
+			FPUTS("<td><a\nhref=\"../at/", fp);
+
+			write_b64_url(fp, (const void *)&hsl->sl->f,
+					src_loc_hash_len(hsl->sl));
+
+			FPUTS("\">", fp);
+			write_html(fp, hsl->loc_name, hsl->lname_len);
+			FPUTS("</a></td></tr>", fp);
+		}
+		FPUTS("</table></body></html>", fp);
 	}
-	FPUTS("</table></body></html>", fp);
-	return h1_200(h1, &html);
+	return h1_200(h1, &bdy, csv ? TYPE_CSV : TYPE_HTML);
 }
 
 /* /$PID/ root endpoint */
@@ -661,10 +708,19 @@ static enum mw_qev pid_root(struct mw_h1 *h1, struct mw_h1req *h1r)
 	FPUTS("<p><a\nhref=\"each/" default_min "\">allocations &gt;"
 		default_min " bytes</a>"
 		"<p><a\nhref=\"" URL "\">" URL "</a></body></html>", fp);
-	return h1_200(h1, &html);
+	return h1_200(h1, &html, TYPE_HTML);
 #undef default_min
 }
 
+/* @e is not NUL-terminated */
+static bool sfx_eq(const char *e, const char *sfx)
+{
+	for (const char *m = sfx; *m; m++, e++)
+		if (*e != *m)
+			return false;
+	return true;
+}
+
 static enum mw_qev h1_dispatch(struct mw_h1 *h1, struct mw_h1req *h1r)
 {
 	if (h1r->method_len == 3 && !memcmp(h1r->method, "GET", 3)) {
@@ -672,10 +728,15 @@ static enum mw_qev h1_dispatch(struct mw_h1 *h1, struct mw_h1req *h1r)
 
 		if ((c = PATH_SKIP(h1r, "/each/"))) {
 			errno = 0;
-			char *end;
-			unsigned long min = strtoul(c, &end, 10);
-			if ((*end == ' ' || *end == '?') && !errno)
-				return each_gt(h1, h1r, min);
+			char *e;
+			unsigned long min = strtoul(c, &e, 10);
+			if (!errno) {
+				if (*e == ' ' || *e == '?')
+					return each_gt(h1, h1r, min, false);
+				if (sfx_eq(e, ".csv") &&
+						(e[4] == ' ' || e[4] == '?'))
+					return each_gt(h1, h1r, min, true);
+			}
 		} else if ((PATH_SKIP(h1r, "/at/"))) {
 			return each_at(h1, h1r);
 		} else if (h1r->path_len == 1 && h1r->path[0] == '/') {
diff --git a/t/mwrap-httpd.t b/t/mwrap-httpd.t
index f300eae..ca90cf0 100644
--- a/t/mwrap-httpd.t
+++ b/t/mwrap-httpd.t
@@ -134,6 +134,24 @@ SKIP: {
 
 SKIP: {
 	skip 'no reset w/o curl --unix-socket', 1 if !$curl_unix;
+
+	$rc = system(qw(curl -vsSf --unix-socket), $sock, '-o', $cout,
+		"http://0/$pid/each/100.csv");
+	is($rc, 0, '.csv retrieved') or skip 'CSV failed', 1;
+	my $db = "$mwrap_tmp/t.sqlite3";
+	$rc = system(qw(sqlite3), $db, ".import --csv $cout mwrap_each");
+	if ($rc == -1) {
+		diag 'sqlite3 missing';
+	} else {
+		is($rc, 0, 'sqlite3 import');
+		my $n = `sqlite3 $db 'SELECT COUNT(*) FROM mwrap_each'`;
+		is($?, 0, 'sqlite3 count');
+		my $exp = split(/\n/, slurp($cout));
+		is($n + 1, $exp, 'imported all rows into sqlite');
+		my $n = `sqlite3 $db 'SELECT COUNT(*) FROM mwrap_each'`;
+		# diag `sqlite3 $db .schema`;
+	}
+
 	$rc = system(qw(curl -vsSf --unix-socket), $sock, '-o', $cout,
 		'-d', 'x=y', "http://0/$pid/reset");
 	is($rc, 0, 'curl /reset');

  parent reply	other threads:[~2022-12-15 20:52 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-15 20:52 [PATCH 00/19] another round of httpd improvements Eric Wong
2022-12-15 20:52 ` [PATCH 01/19] mwrap_httpd: show current bytes consistently Eric Wong
2022-12-15 20:52 ` [PATCH 02/19] introduce AUTO_FREE macro to simplify cleanup Eric Wong
2022-12-15 20:52 ` [PATCH 03/19] httpd: rework httpd to use auto-free for memstream Eric Wong
2022-12-15 20:52 ` [PATCH 04/19] httpd: avoid newline if not using bt: >= 1 Eric Wong
2022-12-15 20:52 ` [PATCH 05/19] mwrap_httpd: flesh out /$PID/ and /$PID/trim endpoints Eric Wong
2022-12-15 20:52 ` [PATCH 06/19] mwrap_httpd: add info about src_file and src_loc stats Eric Wong
2022-12-15 20:52 ` [PATCH 07/19] use uatomic_inc where appropriate Eric Wong
2022-12-15 20:52 ` [PATCH 08/19] httpd: drop unnecessary AND ops from base-64 Eric Wong
2022-12-15 20:52 ` [PATCH 09/19] mymalloc: add notes on the malloc implementation Eric Wong
2022-12-15 20:52 ` [PATCH 10/19] rproxy: link to mwrap_httpd /$PID/ root without each, too Eric Wong
2022-12-15 20:52 ` [PATCH 11/19] httpd: shrink `mean_life' field to `double' Eric Wong
2022-12-15 20:52 ` Eric Wong [this message]
2022-12-15 20:52 ` [PATCH 13/19] rproxy: enable deflater by default Eric Wong
2022-12-15 20:52 ` [PATCH 14/19] mwrap_httpd: do not abort on fork if out-of-resources Eric Wong
2022-12-15 20:52 ` [PATCH 15/19] httpd: pause forking thread on resource limitations Eric Wong
2022-12-15 20:52 ` [PATCH 16/19] rename mwrap_httpd.h to httpd.h Eric Wong
2022-12-15 20:52 ` [PATCH 17/19] httpd: describe simple and naive buffering scheme Eric Wong
2022-12-15 20:52 ` [PATCH 18/19] httpd: drop TODO item for pipelining Eric Wong
2022-12-15 20:52 ` [PATCH 19/19] avoid -Warray-bounds warning, avoid stack overallocation Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221215205255.27840-13-e@80x24.org \
    --to=e@80x24.org \
    --cc=mwrap-perl@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mwrap-perl.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).