From: Eric Wong <e@80x24.org>
To: mwrap-perl@80x24.org
Subject: [PATCH 12/19] httpd: support CSV output
Date: Thu, 15 Dec 2022 20:52:48 +0000 [thread overview]
Message-ID: <20221215205255.27840-13-e@80x24.org> (raw)
In-Reply-To: <20221215205255.27840-1-e@80x24.org>
CSV is well-supported by SQLite (and many other tools) so it can
be useful for offline analysis.
---
mwrap_httpd.h | 171 ++++++++++++++++++++++++++++++++----------------
t/mwrap-httpd.t | 18 +++++
2 files changed, 134 insertions(+), 55 deletions(-)
diff --git a/mwrap_httpd.h b/mwrap_httpd.h
index 3d2bf99..f484bdd 100644
--- a/mwrap_httpd.h
+++ b/mwrap_httpd.h
@@ -30,6 +30,8 @@
#include <pthread.h>
#include <stdbool.h>
#define URL "https://80x24.org/mwrap-perl.git/about"
+#define TYPE_HTML "text/html; charset=UTF-8"
+#define TYPE_CSV "text/csv"
enum mw_qev {
MW_QEV_IGNORE = 0,
@@ -128,7 +130,7 @@ static int cmp_location(const void *x, const void *y)
return strcmp(a->loc_name, b->loc_name);
}
-/* fields for /each/$MIN/ endpoint */
+/* fields for /each/$MIN{,.csv} endpoints */
struct h1_tbl {
const char *fname;
size_t flen;
@@ -257,7 +259,7 @@ static enum mw_qev h1_res_oneshot(struct mw_h1 *h1, const char *buf, size_t len)
}
#define FPUTS(STR, fp) fwrite(STR, sizeof(STR) - 1, 1, fp)
-static enum mw_qev h1_200(struct mw_h1 *h1, struct mw_fbuf *fb)
+static enum mw_qev h1_200(struct mw_h1 *h1, struct mw_fbuf *fb, const char *ct)
{
/*
* the HTTP header goes at the END of the body buffer,
@@ -275,10 +277,8 @@ static enum mw_qev h1_200(struct mw_h1 *h1, struct mw_fbuf *fb)
"Expires: Fri, 01 Jan 1980 00:00:00 GMT\r\n"
"Pragma: no-cache\r\n"
"Cache-Control: no-cache, max-age=0, must-revalidate\r\n"
- "Content-Type: text/html; charset=UTF-8\r\n"
- "Content-Length: ", fb->fp);
- fprintf(fb->fp, "%zu", (size_t)clen);
- FPUTS("\r\n\r\n", fb->fp);
+ "Content-Type: ", fb->fp);
+ fprintf(fb->fp, "%s\r\nContent-Length: %zu\r\n\r\n", ct, (size_t)clen);
if (fbuf_close(fb))
return h1_close(h1);
@@ -354,6 +354,25 @@ static void write_html(FILE *fp, const char *s, size_t len)
}
}
+/*
+ * quotes multi-line backtraces for CSV (and `\' and `"' in case
+ * we encounter nasty file names).
+ */
+static void write_q_csv(FILE *fp, const char *s, size_t len)
+{
+ fputc('"', fp);
+ for (; len--; ++s) {
+ switch (*s) {
+ case '\n': fputs("\\n", fp); break;
+ case '\\': fputs("\\\\", fp); break;
+ case '"': fputs("\\\"", fp); break;
+ default: fputc(*s, fp);
+ }
+ }
+ fputc('"', fp);
+}
+
+
/* URI-safe base-64 (RFC 4648) */
static void write_b64_url(FILE *fp, const uint8_t *in, size_t len)
{
@@ -559,12 +578,12 @@ static enum mw_qev each_at(struct mw_h1 *h1, struct mw_h1req *h1r)
}
rcu_read_unlock();
FPUTS("</table></body></html>", fp);
- return h1_200(h1, &html);
+ return h1_200(h1, &html, TYPE_HTML);
}
/* /$PID/each/$MIN endpoint */
static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
- unsigned long min)
+ unsigned long min, bool csv)
{
static const char default_sort[] = "bytes";
const char *sort = default_sort;
@@ -593,58 +612,86 @@ static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
return h1_close(h1);
}
- struct mw_fbuf html;
- FILE *fp = wbuf_init(&html);
+ struct mw_fbuf bdy;
+ FILE *fp = wbuf_init(&bdy);
if (!fp) return h1_close(h1);
- fprintf(fp, "<html><head><title>mwrap each >%lu"
- "</title></head><body><p>mwrap each >%lu "
- "(change `%lu' in URL to adjust filtering) - MWRAP=bt:%u",
- min, min, min, (unsigned)bt_req_depth);
- show_stats(fp);
- if (bt_req_depth) /* need borders to distinguish multi-level traces */
- FPUTS("<table\nborder=1><tr>", fp);
- else /* save screen space if only tracing one line */
- FPUTS("<table><tr>", fp);
+ if (!csv) {
+ fprintf(fp, "<html><head><title>mwrap each >%lu"
+ "</title></head><body><p>mwrap each >%lu "
+ "(change `%lu' in URL to adjust filtering) - "
+ "MWRAP=bt:%u", min, min, min, (unsigned)bt_req_depth);
+ show_stats(fp);
+ /* need borders to distinguish multi-level traces */
+ if (bt_req_depth)
+ FPUTS("<table\nborder=1><tr>", fp);
+ else /* save screen space if only tracing one line */
+ FPUTS("<table><tr>", fp);
+ }
int (*cmp)(const void *, const void *) = NULL;
- for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
- FPUTS("<th>", fp);
- if (fields[i].flen == sort_len &&
- !memcmp(fields[i].fname, sort, sort_len)) {
- cmp = fields[i].cmp;
- fprintf(fp, "<b>%s</b>", fields[i].fname);
- } else {
- fprintf(fp,
- "<a\nhref=\"./%lu?sort=%s\">%s</a>",
- min, fields[i].fname, fields[i].fname);
+ if (csv) {
+ for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+ const char *fn = fields[i].fname;
+ if (i)
+ fputc(',', fp);
+ fputs(fn, fp);
+ if (fields[i].flen == sort_len &&
+ !memcmp(fn, sort, sort_len))
+ cmp = fields[i].cmp;
+ }
+ fputc('\n', fp);
+ } else {
+ for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+ const char *fn = fields[i].fname;
+ FPUTS("<th>", fp);
+ if (fields[i].flen == sort_len &&
+ !memcmp(fn, sort, sort_len)) {
+ cmp = fields[i].cmp;
+ fprintf(fp, "<b>%s</b>", fields[i].fname);
+ } else {
+ fprintf(fp, "<a\nhref=\"./%lu?sort=%s\">%s</a>",
+ min, fn, fn);
+ }
+ FPUTS("</th>", fp);
}
- FPUTS("</th>", fp);
}
- FPUTS("</tr>", fp);
+ if (!csv)
+ FPUTS("</tr>", fp);
if (cmp)
qsort(hslv, hslc, sizeof(*hslv), cmp);
- else
+ else if (!csv)
FPUTS("<tr><td>sort= not understood</td></tr>", fp);
- for (size_t i = 0; i < hslc; i++) {
- struct h1_src_loc *hsl = &hslv[i];
-
- fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
- "<td>%zu</td><td>%0.3f</td><td>%zu</td>",
- hsl->bytes, hsl->allocations, hsl->frees,
- hsl->live, hsl->mean_life, hsl->max_life);
- FPUTS("<td><a\nhref=\"../at/", fp);
-
- /* yes, we're writing our memory addresses into the URI */
- write_b64_url(fp, (const void *)&hsl->sl->f,
- src_loc_hash_len(hsl->sl));
-
- FPUTS("\">", fp);
- write_html(fp, hsl->loc_name, hsl->lname_len);
- FPUTS("</a></td></tr>", fp);
+ if (csv) {
+ for (size_t i = 0; i < hslc; i++) {
+ struct h1_src_loc *hsl = &hslv[i];
+
+ fprintf(fp, "%zu,%zu,%zu,%zu,%0.3f,%zu,",
+ hsl->bytes, hsl->allocations, hsl->frees,
+ hsl->live, hsl->mean_life, hsl->max_life);
+ write_q_csv(fp, hsl->loc_name, hsl->lname_len);
+ fputc('\n', fp);
+ }
+ } else {
+ for (size_t i = 0; i < hslc; i++) {
+ struct h1_src_loc *hsl = &hslv[i];
+
+ fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
+ "<td>%zu</td><td>%0.3f</td><td>%zu</td>",
+ hsl->bytes, hsl->allocations, hsl->frees,
+ hsl->live, hsl->mean_life, hsl->max_life);
+ FPUTS("<td><a\nhref=\"../at/", fp);
+
+ write_b64_url(fp, (const void *)&hsl->sl->f,
+ src_loc_hash_len(hsl->sl));
+
+ FPUTS("\">", fp);
+ write_html(fp, hsl->loc_name, hsl->lname_len);
+ FPUTS("</a></td></tr>", fp);
+ }
+ FPUTS("</table></body></html>", fp);
}
- FPUTS("</table></body></html>", fp);
- return h1_200(h1, &html);
+ return h1_200(h1, &bdy, csv ? TYPE_CSV : TYPE_HTML);
}
/* /$PID/ root endpoint */
@@ -661,10 +708,19 @@ static enum mw_qev pid_root(struct mw_h1 *h1, struct mw_h1req *h1r)
FPUTS("<p><a\nhref=\"each/" default_min "\">allocations >"
default_min " bytes</a>"
"<p><a\nhref=\"" URL "\">" URL "</a></body></html>", fp);
- return h1_200(h1, &html);
+ return h1_200(h1, &html, TYPE_HTML);
#undef default_min
}
+/* @e is not NUL-terminated */
+static bool sfx_eq(const char *e, const char *sfx)
+{
+ for (const char *m = sfx; *m; m++, e++)
+ if (*e != *m)
+ return false;
+ return true;
+}
+
static enum mw_qev h1_dispatch(struct mw_h1 *h1, struct mw_h1req *h1r)
{
if (h1r->method_len == 3 && !memcmp(h1r->method, "GET", 3)) {
@@ -672,10 +728,15 @@ static enum mw_qev h1_dispatch(struct mw_h1 *h1, struct mw_h1req *h1r)
if ((c = PATH_SKIP(h1r, "/each/"))) {
errno = 0;
- char *end;
- unsigned long min = strtoul(c, &end, 10);
- if ((*end == ' ' || *end == '?') && !errno)
- return each_gt(h1, h1r, min);
+ char *e;
+ unsigned long min = strtoul(c, &e, 10);
+ if (!errno) {
+ if (*e == ' ' || *e == '?')
+ return each_gt(h1, h1r, min, false);
+ if (sfx_eq(e, ".csv") &&
+ (e[4] == ' ' || e[4] == '?'))
+ return each_gt(h1, h1r, min, true);
+ }
} else if ((PATH_SKIP(h1r, "/at/"))) {
return each_at(h1, h1r);
} else if (h1r->path_len == 1 && h1r->path[0] == '/') {
diff --git a/t/mwrap-httpd.t b/t/mwrap-httpd.t
index f300eae..ca90cf0 100644
--- a/t/mwrap-httpd.t
+++ b/t/mwrap-httpd.t
@@ -134,6 +134,24 @@ SKIP: {
SKIP: {
skip 'no reset w/o curl --unix-socket', 1 if !$curl_unix;
+
+ $rc = system(qw(curl -vsSf --unix-socket), $sock, '-o', $cout,
+ "http://0/$pid/each/100.csv");
+ is($rc, 0, '.csv retrieved') or skip 'CSV failed', 1;
+ my $db = "$mwrap_tmp/t.sqlite3";
+ $rc = system(qw(sqlite3), $db, ".import --csv $cout mwrap_each");
+ if ($rc == -1) {
+ diag 'sqlite3 missing';
+ } else {
+ is($rc, 0, 'sqlite3 import');
+ my $n = `sqlite3 $db 'SELECT COUNT(*) FROM mwrap_each'`;
+ is($?, 0, 'sqlite3 count');
+ my $exp = split(/\n/, slurp($cout));
+ is($n + 1, $exp, 'imported all rows into sqlite');
+ my $n = `sqlite3 $db 'SELECT COUNT(*) FROM mwrap_each'`;
+ # diag `sqlite3 $db .schema`;
+ }
+
$rc = system(qw(curl -vsSf --unix-socket), $sock, '-o', $cout,
'-d', 'x=y', "http://0/$pid/reset");
is($rc, 0, 'curl /reset');
next prev parent reply other threads:[~2022-12-15 20:52 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-12-15 20:52 [PATCH 00/19] another round of httpd improvements Eric Wong
2022-12-15 20:52 ` [PATCH 01/19] mwrap_httpd: show current bytes consistently Eric Wong
2022-12-15 20:52 ` [PATCH 02/19] introduce AUTO_FREE macro to simplify cleanup Eric Wong
2022-12-15 20:52 ` [PATCH 03/19] httpd: rework httpd to use auto-free for memstream Eric Wong
2022-12-15 20:52 ` [PATCH 04/19] httpd: avoid newline if not using bt: >= 1 Eric Wong
2022-12-15 20:52 ` [PATCH 05/19] mwrap_httpd: flesh out /$PID/ and /$PID/trim endpoints Eric Wong
2022-12-15 20:52 ` [PATCH 06/19] mwrap_httpd: add info about src_file and src_loc stats Eric Wong
2022-12-15 20:52 ` [PATCH 07/19] use uatomic_inc where appropriate Eric Wong
2022-12-15 20:52 ` [PATCH 08/19] httpd: drop unnecessary AND ops from base-64 Eric Wong
2022-12-15 20:52 ` [PATCH 09/19] mymalloc: add notes on the malloc implementation Eric Wong
2022-12-15 20:52 ` [PATCH 10/19] rproxy: link to mwrap_httpd /$PID/ root without each, too Eric Wong
2022-12-15 20:52 ` [PATCH 11/19] httpd: shrink `mean_life' field to `double' Eric Wong
2022-12-15 20:52 ` Eric Wong [this message]
2022-12-15 20:52 ` [PATCH 13/19] rproxy: enable deflater by default Eric Wong
2022-12-15 20:52 ` [PATCH 14/19] mwrap_httpd: do not abort on fork if out-of-resources Eric Wong
2022-12-15 20:52 ` [PATCH 15/19] httpd: pause forking thread on resource limitations Eric Wong
2022-12-15 20:52 ` [PATCH 16/19] rename mwrap_httpd.h to httpd.h Eric Wong
2022-12-15 20:52 ` [PATCH 17/19] httpd: describe simple and naive buffering scheme Eric Wong
2022-12-15 20:52 ` [PATCH 18/19] httpd: drop TODO item for pipelining Eric Wong
2022-12-15 20:52 ` [PATCH 19/19] avoid -Warray-bounds warning, avoid stack overallocation Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221215205255.27840-13-e@80x24.org \
--to=e@80x24.org \
--cc=mwrap-perl@80x24.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mwrap-perl.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).