support MWRAP=dump_csv:$FILENAME parameter

Just reusing code from httpd.
author: Eric Wong <e@80x24.org> 2023-01-11 01:12:46 +0000
committer: Eric Wong <mwrap-perl@80x24.org> 2023-01-11 04:23:30 +0000
commit: 8ce0068f470f3dad3a2920e7fdeedeee235c44eb (patch)
tree: 1f476d221daf66d999f35fdc96904ca16e99c7e5
parent: 718b313cf3fee3799cdea3ecbbfba8a615066baf (diff)
download: mwrap-8ce0068f470f3dad3a2920e7fdeedeee235c44eb.tar.gz
4 files changed, 173 insertions, 112 deletions
diff --git a/httpd.h b/httpd.h
index ef4d83c..9219d36 100644
--- a/httpd.h
+++ b/httpd.h
@@ -504,9 +504,11 @@ static off_t write_loc_name(FILE *fp, const struct src_loc *l)
          return end - beg;
  }
  
-static struct h1_src_loc *accumulate(unsigned long min, size_t *hslc, FILE *lp)
+static struct h1_src_loc *
+accumulate(struct mw_fbuf *lb, unsigned long min, size_t *hslc)
  {
          struct mw_fbuf fb;
+        if (!fbuf_init(lb)) return NULL;
          if (!fbuf_init(&fb)) return NULL;
          rcu_read_lock();
          struct cds_lfht *t = CMM_LOAD_SHARED(totals);
@@ -528,18 +530,23 @@ static struct h1_src_loc *accumulate(unsigned long min, size_t *hslc, FILE *lp)
                          HUGE_VAL;
                  hsl.max_life = uatomic_read(&l->max_lifespan);
                  hsl.sl = l;
-                hsl.lname_len = write_loc_name(lp, l);
+                hsl.lname_len = write_loc_name(lb->fp, l);
                  fwrite(&hsl, sizeof(hsl), 1, fb.fp);
          }
          rcu_read_unlock();
  
-        struct h1_src_loc *hslv;
-        if (fbuf_close(&fb)) {
-                hslv = NULL;
-        } else {
-                *hslc = fb.len / sizeof(*hslv);
-                mwrap_assert((fb.len % sizeof(*hslv)) == 0);
-                hslv = (struct h1_src_loc *)fb.ptr;
+        if (fbuf_close(&fb) || fbuf_close(lb))
+                return NULL;
+
+        struct h1_src_loc *hslv = (struct h1_src_loc *)fb.ptr;
+        *hslc = fb.len / sizeof(*hslv);
+        mwrap_assert((fb.len % sizeof(*hslv)) == 0);
+        char *n = lb->ptr;
+        for (size_t i = 0; i < *hslc; ++i) {
+                hslv[i].loc_name = n;
+                n += hslv[i].lname_len;
+                if (hslv[i].lname_len < 0)
+                        return NULL;
          }
          return hslv;
  }
@@ -609,124 +616,128 @@ static enum mw_qev each_at(struct mw_h1 *h1, struct mw_h1req *h1r)
          return h1_200(h1, &html, TYPE_HTML);
  }
  
-/* /$PID/each/$MIN endpoint */
-static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
-                                unsigned long min, bool csv)
-{
-        static const char default_sort[] = "bytes";
-        const char *sort;
-        size_t sort_len = 0;
+typedef int (*cmp_fn)(const void *, const void *);
  
-        if (!csv) {
-                sort = default_sort;
-                sort_len = sizeof(default_sort) - 1;
+static cmp_fn write_csv_header(FILE *fp, const char *sort, size_t sort_len)
+{
+        cmp_fn cmp = NULL;
+        for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+                const char *fn = fields[i].fname;
+                if (i)
+                        fputc(',', fp);
+                fputs(fn, fp);
+                if (fields[i].flen == sort_len && !memcmp(fn, sort, sort_len))
+                        cmp = fields[i].cmp;
          }
+        fputc('\n', fp);
+        return cmp;
+}
  
-        if (h1r->qstr && h1r->qlen > 5 && !memcmp(h1r->qstr, "sort=", 5)) {
-                sort = h1r->qstr + 5;
-                sort_len = h1r->qlen - 5;
+static void write_csv_data(FILE *fp, struct h1_src_loc *hslv, size_t hslc)
+{
+        for (size_t i = 0; i < hslc; i++) {
+                struct h1_src_loc *hsl = &hslv[i];
+
+                fprintf(fp, "%zu,%zu,%zu,%zu,%0.3f,%zu,",
+                        hsl->bytes, hsl->allocations, hsl->frees,
+                        hsl->live, hsl->mean_life, hsl->max_life);
+                write_q_csv(fp, hsl->loc_name, hsl->lname_len);
+                fputc('\n', fp);
          }
+}
  
-        size_t hslc;
+static void *write_csv(FILE *fp, size_t min, const char *sort, size_t sort_len)
+{
          AUTO_CLOFREE struct mw_fbuf lb;
-        if (!fbuf_init(&lb)) return h1_close(h1);
-        AUTO_FREE struct h1_src_loc *hslv = accumulate(min, &hslc, lb.fp);
-        if (!hslv)
-                return h1_close(h1);
+        size_t hslc;
+        AUTO_FREE struct h1_src_loc *hslv = accumulate(&lb, min, &hslc);
+        if (!hslv) return NULL;
  
-        if (fbuf_close(&lb))
-                return h1_close(h1);
+        cmp_fn cmp = write_csv_header(fp, sort, sort_len);
+        if (cmp)
+                qsort(hslv, hslc, sizeof(*hslv), cmp);
+        write_csv_data(fp, hslv, hslc);
+        return fp;
+}
  
-        char *n = lb.ptr;
-        for (size_t i = 0; i < hslc; ++i) {
-                hslv[i].loc_name = n;
-                n += hslv[i].lname_len;
-                if (hslv[i].lname_len < 0)
-                        return h1_close(h1);
+/* /$PID/each/$MIN endpoint */
+static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
+                                size_t min, bool csv)
+{
+        static const char default_sort[] = "bytes";
+        const char *sort = csv ? NULL : default_sort;
+        size_t sort_len = csv ? 0 : (sizeof(default_sort) - 1);
+
+        if (h1r->qstr && h1r->qlen > 5 && !memcmp(h1r->qstr, "sort=", 5)) {
+                sort = h1r->qstr + 5;
+                sort_len = h1r->qlen - 5;
          }
  
          struct mw_fbuf bdy;
          FILE *fp = wbuf_init(&bdy);
          if (!fp) return h1_close(h1);
-
-        if (!csv) {
-                unsigned depth = (unsigned)CMM_LOAD_SHARED(bt_req_depth);
-                fprintf(fp, "<html><head><title>mwrap each &gt;%lu"
-                        "</title></head><body><p>mwrap each &gt;%lu "
-                        "(change `%lu' in URL to adjust filtering) - "
-                        "MWRAP=bt:%u <a href=\"%lu.csv\">.csv</a>",
-                        min, min, min, depth, min);
-                show_stats(fp);
-                /* need borders to distinguish multi-level traces */
-                if (depth)
-                        FPUTS("<table\nborder=1><tr>", fp);
-                else /* save screen space if only tracing one line */
-                        FPUTS("<table><tr>", fp);
+        if (csv) {
+                if (write_csv(fp, min, sort, sort_len))
+                        return h1_200(h1, &bdy, TYPE_CSV);
+                return h1_close(h1);
          }
  
-        int (*cmp)(const void *, const void *) = NULL;
-        if (csv) {
-                for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
-                        const char *fn = fields[i].fname;
-                        if (i)
-                                fputc(',', fp);
-                        fputs(fn, fp);
-                        if (fields[i].flen == sort_len &&
-                                        !memcmp(fn, sort, sort_len))
-                                cmp = fields[i].cmp;
-                }
-                fputc('\n', fp);
-        } else {
-                for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
-                        const char *fn = fields[i].fname;
-                        FPUTS("<th>", fp);
-                        if (fields[i].flen == sort_len &&
-                                        !memcmp(fn, sort, sort_len)) {
-                                cmp = fields[i].cmp;
-                                fprintf(fp, "<b>%s</b>", fields[i].fname);
-                        } else {
-                                fprintf(fp, "<a\nhref=\"./%lu?sort=%s\">%s</a>",
-                                        min, fn, fn);
-                        }
-                        FPUTS("</th>", fp);
+        size_t hslc;
+        AUTO_CLOFREE struct mw_fbuf lb;
+        AUTO_FREE struct h1_src_loc *hslv = accumulate(&lb, min, &hslc);
+        if (!hslv)
+                return h1_close(h1);
+
+        unsigned depth = (unsigned)CMM_LOAD_SHARED(bt_req_depth);
+        fprintf(fp, "<html><head><title>mwrap each &gt;%lu"
+                "</title></head><body><p>mwrap each &gt;%lu "
+                "(change `%lu' in URL to adjust filtering) - "
+                "MWRAP=bt:%u <a href=\"%lu.csv\">.csv</a>",
+                min, min, min, depth, min);
+        show_stats(fp);
+        /* need borders to distinguish multi-level traces */
+        if (depth)
+                FPUTS("<table\nborder=1><tr>", fp);
+        else /* save screen space if only tracing one line */
+                FPUTS("<table><tr>", fp);
+        cmp_fn cmp = NULL;
+        for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+                const char *fn = fields[i].fname;
+                FPUTS("<th>", fp);
+                if (fields[i].flen == sort_len &&
+                                !memcmp(fn, sort, sort_len)) {
+                        cmp = fields[i].cmp;
+                        fprintf(fp, "<b>%s</b>", fields[i].fname);
+                } else {
+                        fprintf(fp, "<a\nhref=\"./%lu?sort=%s\">%s</a>",
+                                min, fn, fn);
                  }
+                FPUTS("</th>", fp);
          }
-        if (!csv)
-                FPUTS("</tr>", fp);
+        FPUTS("</tr>", fp);
          if (cmp)
                  qsort(hslv, hslc, sizeof(*hslv), cmp);
-        else if (!csv)
+        else
                  FPUTS("<tr><td>sort= not understood</td></tr>", fp);
-        if (csv) {
-                for (size_t i = 0; i < hslc; i++) {
-                        struct h1_src_loc *hsl = &hslv[i];
  
-                        fprintf(fp, "%zu,%zu,%zu,%zu,%0.3f,%zu,",
-                                hsl->bytes, hsl->allocations, hsl->frees,
-                                hsl->live, hsl->mean_life, hsl->max_life);
-                        write_q_csv(fp, hsl->loc_name, hsl->lname_len);
-                        fputc('\n', fp);
-                }
-        } else {
-                for (size_t i = 0; i < hslc; i++) {
-                        struct h1_src_loc *hsl = &hslv[i];
+        for (size_t i = 0; i < hslc; i++) {
+                struct h1_src_loc *hsl = &hslv[i];
  
-                        fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
-                                "<td>%zu</td><td>%0.3f</td><td>%zu</td>",
-                                hsl->bytes, hsl->allocations, hsl->frees,
-                                hsl->live, hsl->mean_life, hsl->max_life);
-                        FPUTS("<td><a\nhref=\"../at/", fp);
+                fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
+                        "<td>%zu</td><td>%0.3f</td><td>%zu</td>",
+                        hsl->bytes, hsl->allocations, hsl->frees,
+                        hsl->live, hsl->mean_life, hsl->max_life);
+                FPUTS("<td><a\nhref=\"../at/", fp);
  
-                        write_b64_url(fp, src_loc_hash_tip(hsl->sl),
-                                        src_loc_hash_len(hsl->sl));
+                write_b64_url(fp, src_loc_hash_tip(hsl->sl),
+                                src_loc_hash_len(hsl->sl));
  
-                        FPUTS("\">", fp);
-                        write_html(fp, hsl->loc_name, hsl->lname_len);
-                        FPUTS("</a></td></tr>", fp);
-                }
-                FPUTS("</table></body></html>", fp);
+                FPUTS("\">", fp);
+                write_html(fp, hsl->loc_name, hsl->lname_len);
+                FPUTS("</a></td></tr>", fp);
          }
-        return h1_200(h1, &bdy, csv ? TYPE_CSV : TYPE_HTML);
+        FPUTS("</table></body></html>", fp);
+        return h1_200(h1, &bdy, TYPE_HTML);
  }
  
  /* /$PID/ root endpoint */
@@ -781,7 +792,7 @@ static enum mw_qev h1_dispatch(struct mw_h1 *h1, struct mw_h1req *h1r)
                  if ((c = PATH_SKIP(h1r, "/each/"))) {
                          errno = 0;
                          char *e;
-                        unsigned long min = strtoul(c, &e, 10);
+                        size_t min = (size_t)strtoul(c, &e, 10);
                          if (!errno) {
                                  if (*e == ' ' || *e == '?')
                                          return each_gt(h1, h1r, min, false);
diff --git a/mwrap_core.h b/mwrap_core.h
index deb3bb3..fff0538 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -732,6 +732,7 @@ enomem:
  struct dump_arg {
          FILE *fp;
          size_t min;
+        bool dump_csv;
  };
  
  char **bt_syms(void * const *addrlist, uint32_t size)
@@ -754,12 +755,16 @@ static void cleanup_free(void *any)
          free(*p);
  }
  
+static void *write_csv(FILE *, size_t min, const char *sort, size_t sort_len);
  static void *dump_to_file(struct dump_arg *a)
  {
          struct cds_lfht_iter iter;
          struct src_loc *l;
          struct cds_lfht *t;
  
+        if (a->dump_csv)
+                return write_csv(a->fp, a->min, NULL, 0);
+
          ++locating;
          rcu_read_lock();
          t = CMM_LOAD_SHARED(totals);
@@ -857,7 +862,7 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
  {
          const char *opt = getenv("MWRAP");
          const char *modes[] = { "a", "a+", "w", "w+", "r+" };
-        struct dump_arg a = { .min = 0 };
+        struct dump_arg a = { .min = 0, .dump_csv = false };
          size_t i;
          int dump_fd;
          char *dump_path;
@@ -870,9 +875,24 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
                  return;
  
          ++locating;
-        if ((dump_path = strstr(opt, "dump_path:")) &&
-                        (dump_path += sizeof("dump_path")) &&
-                        *dump_path) {
+
+        /* parse dump_csv:$PATHNAME */
+        if ((dump_path = strstr(opt, "dump_csv:"))) {
+                dump_path += sizeof("dump_csv");
+                if (!*dump_path)
+                        dump_path = NULL;
+                else
+                        a.dump_csv = true;
+        }
+        if (!dump_path) {
+                /* parse dump_path:$PATHNAME */
+                if ((dump_path = strstr(opt, "dump_path:"))) {
+                        dump_path += sizeof("dump_path");
+                        if (!*dump_path)
+                                dump_path = NULL;
+                }
+        }
+        if (dump_path) {
                  char *end = strchr(dump_path, ',');
                  char buf[PATH_MAX];
                  if (end) {
@@ -887,10 +907,13 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
                          fprintf(stderr, "open %s failed: %m\n", dump_path);
                          goto out;
                  }
-        }
-        else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
+        } else if ((s = strstr(opt, "dump_fd:")) &&
+                        !sscanf(s, "dump_fd:%d", &dump_fd))
                  goto out;
  
+        /* allow dump_csv standalone for dump_fd */
+        if (!a.dump_csv && strstr(opt, "dump_csv"))
+                a.dump_csv = true;
          if ((s = strstr(opt, "dump_min:")))
                  sscanf(s, "dump_min:%zu", &a.min);
  
diff --git a/script/mwrap-perl b/script/mwrap-perl
index 182b0bd..eb29176 100644
--- a/script/mwrap-perl
+++ b/script/mwrap-perl
@@ -76,12 +76,20 @@ Dumps the output at exit to a given filename:
  
          total_bytes        call_count        location
  
-In the future, dumping to a self-describing CSV will be supported.
-
  =item dump_fd:$DESCRIPTOR
  
  As with dump_path, but dumps the output to a given file descriptor.
  
+=item dump_csv:$FILENAME
+
+Dump CSV to the given filename.
+
+This output matches the HTTP server output and includes column headers,
+but is subject to change in future releases.
+
+C<dump_csv> without the C<:> may also be used in conjunction with
+C<dump_fd>, such as C<MWRAP=dump_fd:2,dump_csv>.
+
  =back
  
  =head1 HTTP POST API
diff --git a/t/mwrap.t b/t/mwrap.t
index 6f99715..ccd739b 100644
--- a/t/mwrap.t
+++ b/t/mwrap.t
@@ -9,7 +9,8 @@ my $dump = "$mwrap_tmp/dump";
  {
          my $env = { MWRAP => "dump_path:$dump,dump_min:10000" };
          my $nr = 1000;
-        mwrap_run('dump test', $env, '-e', '$x = "hello world" x '.$nr);
+        my $script = '$x = "hello world" x '.$nr;
+        mwrap_run('dump test', $env, '-e', $script);
          ok(-s $dump, "dump file written to");
          my $s = slurp($dump);
          truncate($dump, 0);
@@ -23,6 +24,24 @@ my $dump = "$mwrap_tmp/dump";
          } else {
                  fail("$s failed to match $re");
          }
+
+        $env->{MWRAP} = "dump_csv:$dump";
+        mwrap_run('dump_csv test', $env, '-e', $script);
+        ok(-s $dump, "CSV written to path");
+        $s = slurp($dump);
+        truncate($dump, 0);
+        my $nr_comma = ($s =~ tr/,/,/);
+        my $nr_cr = ($s =~ tr/\n/\n/);
+        ok($nr_comma > ($nr_cr * 4), 'CSV has more commas than CR');
+
+        $env->{MWRAP} = 'dump_csv,dump_fd:2';
+        mwrap_run('dump_csv,dump_fd test', $env, '-e', $script);
+        ok(-s $mwrap_err, "CSV written to stderr");
+        $s = slurp($mwrap_err);
+        truncate($mwrap_err, 0);
+        $nr_comma = ($s =~ tr/,/,/);
+        $nr_cr = ($s =~ tr/\n/\n/);
+        ok($nr_comma > ($nr_cr * 4), 'CSV has more commas than CR');
  }
  
  SKIP: { # C++ program which uses malloc via "new"
author	Eric Wong <e@80x24.org>	2023-01-11 01:12:46 +0000
committer	Eric Wong <mwrap-perl@80x24.org>	2023-01-11 04:23:30 +0000
commit	8ce0068f470f3dad3a2920e7fdeedeee235c44eb (patch)
tree	1f476d221daf66d999f35fdc96904ca16e99c7e5
parent	718b313cf3fee3799cdea3ecbbfba8a615066baf (diff)
download	mwrap-8ce0068f470f3dad3a2920e7fdeedeee235c44eb.tar.gz