* [PATCH 0/4] CSV-related improvements
@ 2023-01-11 1:12 Eric Wong
2023-01-11 1:12 ` [PATCH 1/4] support MWRAP=dump_csv:$FILENAME parameter Eric Wong
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-11 1:12 UTC (permalink / raw)
To: mwrap-perl
It should've been CSV from the beginning...
Eric Wong (4):
support MWRAP=dump_csv:$FILENAME parameter
add mwrap-decode-csv tool
%p => PID expansion for dump_path + dump_csv
rewrite README and update manpage to favor CSV
MANIFEST | 1 +
Makefile.PL | 3 +-
Mwrap.xs | 5 +
README | 61 ++++++-----
httpd.h | 217 ++++++++++++++++++++------------------
lib/Devel/Mwrap/Rproxy.pm | 33 +++---
mwrap_core.h | 50 +++++++--
script/mwrap-decode-csv | 57 ++++++++++
script/mwrap-perl | 18 +++-
t/mwrap.t | 36 ++++++-
10 files changed, 327 insertions(+), 154 deletions(-)
create mode 100644 script/mwrap-decode-csv
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/4] support MWRAP=dump_csv:$FILENAME parameter
2023-01-11 1:12 [PATCH 0/4] CSV-related improvements Eric Wong
@ 2023-01-11 1:12 ` Eric Wong
2023-01-11 1:12 ` [PATCH 2/4] add mwrap-decode-csv tool Eric Wong
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-11 1:12 UTC (permalink / raw)
To: mwrap-perl
Just reusing code from httpd.
---
httpd.h | 217 ++++++++++++++++++++++++----------------------
mwrap_core.h | 35 ++++++--
script/mwrap-perl | 12 ++-
t/mwrap.t | 21 ++++-
4 files changed, 173 insertions(+), 112 deletions(-)
diff --git a/httpd.h b/httpd.h
index ef4d83c..9219d36 100644
--- a/httpd.h
+++ b/httpd.h
@@ -504,9 +504,11 @@ static off_t write_loc_name(FILE *fp, const struct src_loc *l)
return end - beg;
}
-static struct h1_src_loc *accumulate(unsigned long min, size_t *hslc, FILE *lp)
+static struct h1_src_loc *
+accumulate(struct mw_fbuf *lb, unsigned long min, size_t *hslc)
{
struct mw_fbuf fb;
+ if (!fbuf_init(lb)) return NULL;
if (!fbuf_init(&fb)) return NULL;
rcu_read_lock();
struct cds_lfht *t = CMM_LOAD_SHARED(totals);
@@ -528,18 +530,23 @@ static struct h1_src_loc *accumulate(unsigned long min, size_t *hslc, FILE *lp)
HUGE_VAL;
hsl.max_life = uatomic_read(&l->max_lifespan);
hsl.sl = l;
- hsl.lname_len = write_loc_name(lp, l);
+ hsl.lname_len = write_loc_name(lb->fp, l);
fwrite(&hsl, sizeof(hsl), 1, fb.fp);
}
rcu_read_unlock();
- struct h1_src_loc *hslv;
- if (fbuf_close(&fb)) {
- hslv = NULL;
- } else {
- *hslc = fb.len / sizeof(*hslv);
- mwrap_assert((fb.len % sizeof(*hslv)) == 0);
- hslv = (struct h1_src_loc *)fb.ptr;
+ if (fbuf_close(&fb) || fbuf_close(lb))
+ return NULL;
+
+ struct h1_src_loc *hslv = (struct h1_src_loc *)fb.ptr;
+ *hslc = fb.len / sizeof(*hslv);
+ mwrap_assert((fb.len % sizeof(*hslv)) == 0);
+ char *n = lb->ptr;
+ for (size_t i = 0; i < *hslc; ++i) {
+ hslv[i].loc_name = n;
+ n += hslv[i].lname_len;
+ if (hslv[i].lname_len < 0)
+ return NULL;
}
return hslv;
}
@@ -609,124 +616,128 @@ static enum mw_qev each_at(struct mw_h1 *h1, struct mw_h1req *h1r)
return h1_200(h1, &html, TYPE_HTML);
}
-/* /$PID/each/$MIN endpoint */
-static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
- unsigned long min, bool csv)
-{
- static const char default_sort[] = "bytes";
- const char *sort;
- size_t sort_len = 0;
+typedef int (*cmp_fn)(const void *, const void *);
- if (!csv) {
- sort = default_sort;
- sort_len = sizeof(default_sort) - 1;
+static cmp_fn write_csv_header(FILE *fp, const char *sort, size_t sort_len)
+{
+ cmp_fn cmp = NULL;
+ for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+ const char *fn = fields[i].fname;
+ if (i)
+ fputc(',', fp);
+ fputs(fn, fp);
+ if (fields[i].flen == sort_len && !memcmp(fn, sort, sort_len))
+ cmp = fields[i].cmp;
}
+ fputc('\n', fp);
+ return cmp;
+}
- if (h1r->qstr && h1r->qlen > 5 && !memcmp(h1r->qstr, "sort=", 5)) {
- sort = h1r->qstr + 5;
- sort_len = h1r->qlen - 5;
+static void write_csv_data(FILE *fp, struct h1_src_loc *hslv, size_t hslc)
+{
+ for (size_t i = 0; i < hslc; i++) {
+ struct h1_src_loc *hsl = &hslv[i];
+
+ fprintf(fp, "%zu,%zu,%zu,%zu,%0.3f,%zu,",
+ hsl->bytes, hsl->allocations, hsl->frees,
+ hsl->live, hsl->mean_life, hsl->max_life);
+ write_q_csv(fp, hsl->loc_name, hsl->lname_len);
+ fputc('\n', fp);
}
+}
- size_t hslc;
+static void *write_csv(FILE *fp, size_t min, const char *sort, size_t sort_len)
+{
AUTO_CLOFREE struct mw_fbuf lb;
- if (!fbuf_init(&lb)) return h1_close(h1);
- AUTO_FREE struct h1_src_loc *hslv = accumulate(min, &hslc, lb.fp);
- if (!hslv)
- return h1_close(h1);
+ size_t hslc;
+ AUTO_FREE struct h1_src_loc *hslv = accumulate(&lb, min, &hslc);
+ if (!hslv) return NULL;
- if (fbuf_close(&lb))
- return h1_close(h1);
+ cmp_fn cmp = write_csv_header(fp, sort, sort_len);
+ if (cmp)
+ qsort(hslv, hslc, sizeof(*hslv), cmp);
+ write_csv_data(fp, hslv, hslc);
+ return fp;
+}
- char *n = lb.ptr;
- for (size_t i = 0; i < hslc; ++i) {
- hslv[i].loc_name = n;
- n += hslv[i].lname_len;
- if (hslv[i].lname_len < 0)
- return h1_close(h1);
+/* /$PID/each/$MIN endpoint */
+static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r,
+ size_t min, bool csv)
+{
+ static const char default_sort[] = "bytes";
+ const char *sort = csv ? NULL : default_sort;
+ size_t sort_len = csv ? 0 : (sizeof(default_sort) - 1);
+
+ if (h1r->qstr && h1r->qlen > 5 && !memcmp(h1r->qstr, "sort=", 5)) {
+ sort = h1r->qstr + 5;
+ sort_len = h1r->qlen - 5;
}
struct mw_fbuf bdy;
FILE *fp = wbuf_init(&bdy);
if (!fp) return h1_close(h1);
-
- if (!csv) {
- unsigned depth = (unsigned)CMM_LOAD_SHARED(bt_req_depth);
- fprintf(fp, "<html><head><title>mwrap each >%lu"
- "</title></head><body><p>mwrap each >%lu "
- "(change `%lu' in URL to adjust filtering) - "
- "MWRAP=bt:%u <a href=\"%lu.csv\">.csv</a>",
- min, min, min, depth, min);
- show_stats(fp);
- /* need borders to distinguish multi-level traces */
- if (depth)
- FPUTS("<table\nborder=1><tr>", fp);
- else /* save screen space if only tracing one line */
- FPUTS("<table><tr>", fp);
+ if (csv) {
+ if (write_csv(fp, min, sort, sort_len))
+ return h1_200(h1, &bdy, TYPE_CSV);
+ return h1_close(h1);
}
- int (*cmp)(const void *, const void *) = NULL;
- if (csv) {
- for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
- const char *fn = fields[i].fname;
- if (i)
- fputc(',', fp);
- fputs(fn, fp);
- if (fields[i].flen == sort_len &&
- !memcmp(fn, sort, sort_len))
- cmp = fields[i].cmp;
- }
- fputc('\n', fp);
- } else {
- for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
- const char *fn = fields[i].fname;
- FPUTS("<th>", fp);
- if (fields[i].flen == sort_len &&
- !memcmp(fn, sort, sort_len)) {
- cmp = fields[i].cmp;
- fprintf(fp, "<b>%s</b>", fields[i].fname);
- } else {
- fprintf(fp, "<a\nhref=\"./%lu?sort=%s\">%s</a>",
- min, fn, fn);
- }
- FPUTS("</th>", fp);
+ size_t hslc;
+ AUTO_CLOFREE struct mw_fbuf lb;
+ AUTO_FREE struct h1_src_loc *hslv = accumulate(&lb, min, &hslc);
+ if (!hslv)
+ return h1_close(h1);
+
+ unsigned depth = (unsigned)CMM_LOAD_SHARED(bt_req_depth);
+ fprintf(fp, "<html><head><title>mwrap each >%lu"
+ "</title></head><body><p>mwrap each >%lu "
+ "(change `%lu' in URL to adjust filtering) - "
+ "MWRAP=bt:%u <a href=\"%lu.csv\">.csv</a>",
+ min, min, min, depth, min);
+ show_stats(fp);
+ /* need borders to distinguish multi-level traces */
+ if (depth)
+ FPUTS("<table\nborder=1><tr>", fp);
+ else /* save screen space if only tracing one line */
+ FPUTS("<table><tr>", fp);
+ cmp_fn cmp = NULL;
+ for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) {
+ const char *fn = fields[i].fname;
+ FPUTS("<th>", fp);
+ if (fields[i].flen == sort_len &&
+ !memcmp(fn, sort, sort_len)) {
+ cmp = fields[i].cmp;
+ fprintf(fp, "<b>%s</b>", fields[i].fname);
+ } else {
+ fprintf(fp, "<a\nhref=\"./%lu?sort=%s\">%s</a>",
+ min, fn, fn);
}
+ FPUTS("</th>", fp);
}
- if (!csv)
- FPUTS("</tr>", fp);
+ FPUTS("</tr>", fp);
if (cmp)
qsort(hslv, hslc, sizeof(*hslv), cmp);
- else if (!csv)
+ else
FPUTS("<tr><td>sort= not understood</td></tr>", fp);
- if (csv) {
- for (size_t i = 0; i < hslc; i++) {
- struct h1_src_loc *hsl = &hslv[i];
- fprintf(fp, "%zu,%zu,%zu,%zu,%0.3f,%zu,",
- hsl->bytes, hsl->allocations, hsl->frees,
- hsl->live, hsl->mean_life, hsl->max_life);
- write_q_csv(fp, hsl->loc_name, hsl->lname_len);
- fputc('\n', fp);
- }
- } else {
- for (size_t i = 0; i < hslc; i++) {
- struct h1_src_loc *hsl = &hslv[i];
+ for (size_t i = 0; i < hslc; i++) {
+ struct h1_src_loc *hsl = &hslv[i];
- fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
- "<td>%zu</td><td>%0.3f</td><td>%zu</td>",
- hsl->bytes, hsl->allocations, hsl->frees,
- hsl->live, hsl->mean_life, hsl->max_life);
- FPUTS("<td><a\nhref=\"../at/", fp);
+ fprintf(fp, "<tr><td>%zu</td><td>%zu</td><td>%zu</td>"
+ "<td>%zu</td><td>%0.3f</td><td>%zu</td>",
+ hsl->bytes, hsl->allocations, hsl->frees,
+ hsl->live, hsl->mean_life, hsl->max_life);
+ FPUTS("<td><a\nhref=\"../at/", fp);
- write_b64_url(fp, src_loc_hash_tip(hsl->sl),
- src_loc_hash_len(hsl->sl));
+ write_b64_url(fp, src_loc_hash_tip(hsl->sl),
+ src_loc_hash_len(hsl->sl));
- FPUTS("\">", fp);
- write_html(fp, hsl->loc_name, hsl->lname_len);
- FPUTS("</a></td></tr>", fp);
- }
- FPUTS("</table></body></html>", fp);
+ FPUTS("\">", fp);
+ write_html(fp, hsl->loc_name, hsl->lname_len);
+ FPUTS("</a></td></tr>", fp);
}
- return h1_200(h1, &bdy, csv ? TYPE_CSV : TYPE_HTML);
+ FPUTS("</table></body></html>", fp);
+ return h1_200(h1, &bdy, TYPE_HTML);
}
/* /$PID/ root endpoint */
@@ -781,7 +792,7 @@ static enum mw_qev h1_dispatch(struct mw_h1 *h1, struct mw_h1req *h1r)
if ((c = PATH_SKIP(h1r, "/each/"))) {
errno = 0;
char *e;
- unsigned long min = strtoul(c, &e, 10);
+ size_t min = (size_t)strtoul(c, &e, 10);
if (!errno) {
if (*e == ' ' || *e == '?')
return each_gt(h1, h1r, min, false);
diff --git a/mwrap_core.h b/mwrap_core.h
index deb3bb3..fff0538 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -732,6 +732,7 @@ enomem:
struct dump_arg {
FILE *fp;
size_t min;
+ bool dump_csv;
};
char **bt_syms(void * const *addrlist, uint32_t size)
@@ -754,12 +755,16 @@ static void cleanup_free(void *any)
free(*p);
}
+static void *write_csv(FILE *, size_t min, const char *sort, size_t sort_len);
static void *dump_to_file(struct dump_arg *a)
{
struct cds_lfht_iter iter;
struct src_loc *l;
struct cds_lfht *t;
+ if (a->dump_csv)
+ return write_csv(a->fp, a->min, NULL, 0);
+
++locating;
rcu_read_lock();
t = CMM_LOAD_SHARED(totals);
@@ -857,7 +862,7 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
{
const char *opt = getenv("MWRAP");
const char *modes[] = { "a", "a+", "w", "w+", "r+" };
- struct dump_arg a = { .min = 0 };
+ struct dump_arg a = { .min = 0, .dump_csv = false };
size_t i;
int dump_fd;
char *dump_path;
@@ -870,9 +875,24 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
return;
++locating;
- if ((dump_path = strstr(opt, "dump_path:")) &&
- (dump_path += sizeof("dump_path")) &&
- *dump_path) {
+
+ /* parse dump_csv:$PATHNAME */
+ if ((dump_path = strstr(opt, "dump_csv:"))) {
+ dump_path += sizeof("dump_csv");
+ if (!*dump_path)
+ dump_path = NULL;
+ else
+ a.dump_csv = true;
+ }
+ if (!dump_path) {
+ /* parse dump_path:$PATHNAME */
+ if ((dump_path = strstr(opt, "dump_path:"))) {
+ dump_path += sizeof("dump_path");
+ if (!*dump_path)
+ dump_path = NULL;
+ }
+ }
+ if (dump_path) {
char *end = strchr(dump_path, ',');
char buf[PATH_MAX];
if (end) {
@@ -887,10 +907,13 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
fprintf(stderr, "open %s failed: %m\n", dump_path);
goto out;
}
- }
- else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
+ } else if ((s = strstr(opt, "dump_fd:")) &&
+ !sscanf(s, "dump_fd:%d", &dump_fd))
goto out;
+ /* allow dump_csv standalone for dump_fd */
+ if (!a.dump_csv && strstr(opt, "dump_csv"))
+ a.dump_csv = true;
if ((s = strstr(opt, "dump_min:")))
sscanf(s, "dump_min:%zu", &a.min);
diff --git a/script/mwrap-perl b/script/mwrap-perl
index 182b0bd..eb29176 100644
--- a/script/mwrap-perl
+++ b/script/mwrap-perl
@@ -76,12 +76,20 @@ Dumps the output at exit to a given filename:
total_bytes call_count location
-In the future, dumping to a self-describing CSV will be supported.
-
=item dump_fd:$DESCRIPTOR
As with dump_path, but dumps the output to a given file descriptor.
+=item dump_csv:$FILENAME
+
+Dump CSV to the given filename.
+
+This output matches the HTTP server output and includes column headers,
+but is subject to change in future releases.
+
+C<dump_csv> without the C<:> may also be used in conjunction with
+C<dump_fd>, such as C<MWRAP=dump_fd:2,dump_csv>.
+
=back
=head1 HTTP POST API
diff --git a/t/mwrap.t b/t/mwrap.t
index 6f99715..ccd739b 100644
--- a/t/mwrap.t
+++ b/t/mwrap.t
@@ -9,7 +9,8 @@ my $dump = "$mwrap_tmp/dump";
{
my $env = { MWRAP => "dump_path:$dump,dump_min:10000" };
my $nr = 1000;
- mwrap_run('dump test', $env, '-e', '$x = "hello world" x '.$nr);
+ my $script = '$x = "hello world" x '.$nr;
+ mwrap_run('dump test', $env, '-e', $script);
ok(-s $dump, "dump file written to");
my $s = slurp($dump);
truncate($dump, 0);
@@ -23,6 +24,24 @@ my $dump = "$mwrap_tmp/dump";
} else {
fail("$s failed to match $re");
}
+
+ $env->{MWRAP} = "dump_csv:$dump";
+ mwrap_run('dump_csv test', $env, '-e', $script);
+ ok(-s $dump, "CSV written to path");
+ $s = slurp($dump);
+ truncate($dump, 0);
+ my $nr_comma = ($s =~ tr/,/,/);
+ my $nr_cr = ($s =~ tr/\n/\n/);
+ ok($nr_comma > ($nr_cr * 4), 'CSV has more commas than CR');
+
+ $env->{MWRAP} = 'dump_csv,dump_fd:2';
+ mwrap_run('dump_csv,dump_fd test', $env, '-e', $script);
+ ok(-s $mwrap_err, "CSV written to stderr");
+ $s = slurp($mwrap_err);
+ truncate($mwrap_err, 0);
+ $nr_comma = ($s =~ tr/,/,/);
+ $nr_cr = ($s =~ tr/\n/\n/);
+ ok($nr_comma > ($nr_cr * 4), 'CSV has more commas than CR');
}
SKIP: { # C++ program which uses malloc via "new"
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/4] add mwrap-decode-csv tool
2023-01-11 1:12 [PATCH 0/4] CSV-related improvements Eric Wong
2023-01-11 1:12 ` [PATCH 1/4] support MWRAP=dump_csv:$FILENAME parameter Eric Wong
@ 2023-01-11 1:12 ` Eric Wong
2023-01-11 1:12 ` [PATCH 3/4] %p => PID expansion for dump_path + dump_csv Eric Wong
2023-01-11 1:12 ` [PATCH 4/4] rewrite README and update manpage to favor CSV Eric Wong
3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-11 1:12 UTC (permalink / raw)
To: mwrap-perl
This is a useful companion to the dump_csv: directive.
It also fixes a bug where HTML escaping was unnecessarily done
to the CSV output by -rproxy.
---
MANIFEST | 1 +
Makefile.PL | 3 ++-
lib/Devel/Mwrap/Rproxy.pm | 33 +++++++++++++----------
script/mwrap-decode-csv | 57 +++++++++++++++++++++++++++++++++++++++
4 files changed, 79 insertions(+), 15 deletions(-)
create mode 100644 script/mwrap-decode-csv
diff --git a/MANIFEST b/MANIFEST
index 096cec9..cf42979 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -20,6 +20,7 @@ mymalloc.h
picohttpparser.h
picohttpparser_c.h
ppport.h
+script/mwrap-decode-csv
script/mwrap-perl
script/mwrap-rproxy
t/httpd-unit.t
diff --git a/Makefile.PL b/Makefile.PL
index dadf80b..41e8f03 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -83,7 +83,8 @@ push @writemakefile_args, (
CCFLAGS => "$Config{ccflags} $ccflags",
PREREQ_PM => {},
ABSTRACT_FROM => 'lib/Devel/Mwrap.pm',
- EXE_FILES => [qw(script/mwrap-perl script/mwrap-rproxy)],
+ EXE_FILES => [qw(script/mwrap-perl script/mwrap-rproxy
+ script/mwrap-decode-csv)],
AUTHOR => 'mwrap hackers <mwrap-perl@80x24.org>',
LIBS => $LIBS, # e.g. -lurcu-cds
LICENSE => 'gpl_2', # GPL-3.0+, CPAN::Meta::Spec limitation
diff --git a/lib/Devel/Mwrap/Rproxy.pm b/lib/Devel/Mwrap/Rproxy.pm
index d5a9d9d..de65685 100644
--- a/lib/Devel/Mwrap/Rproxy.pm
+++ b/lib/Devel/Mwrap/Rproxy.pm
@@ -11,9 +11,8 @@ package Devel::Mwrap::Rproxy;
use v5.12; # strict
use Fcntl qw(SEEK_SET);
use IO::Socket::UNIX;
-use Plack::Util;
-sub new { bless { socket_dir => $_[1]}, $_[0] }
+sub new { require Plack::Util; bless { socket_dir => $_[1]}, $_[0] }
sub r {
[ $_[0], [
@@ -104,13 +103,26 @@ sub a2l {
$a2l ? do {
chomp(my $line = $a2l->lookup($addr));
$line =~ s/\Q?? at ??:0\E//; # FreeBSD
- $line = Plack::Util::encode_html($line);
$line =~ /\?\?/ ? "$line $exe($addr)" :
($line =~ /\S/ ? $line : "$exe($addr)");
} : "$exe($addr)"
}
}
+sub decode_csv {
+ my ($in, $out) = @_;
+ while (<$in>) {
+ s/\\n/\0\0/g;
+ s!(["\0])
+ ([^\("\0]+) # exe
+ \(([^\)"\0]+)\) # addr
+ (["\0])!
+ $1.a2l($2,$3).$4!gex;
+ s/\0\0/\\n/g;
+ $out->write($_);
+ }
+}
+
sub call { # PSGI entry point
my ($self, $env) = @_;
my $uri = $env->{REQUEST_URI};
@@ -152,23 +164,16 @@ sub call { # PSGI entry point
local %addr2line;
# extract executable|library(address)
if ($csv) {
- while (<$c>) {
- s/\\n/\0\0/g;
- s!(["\0])
- ([^\("\0]+) # exe
- \(([^\)"\0]+)\) # addr
- (["\0])!
- $1.a2l($2,$3).$4!gex;
- s/\0\0/\\n/g;
- $http_out->write($_);
- }
+ decode_csv($c, $http_out);
} else {
while (<$c>) {
s!>
([^\(<]+) # exe
\(([^\)<]+)\) # addr
<!
- '>'.a2l($1,$2).'<'!gex;
+ '>'.Plack::Util::encode_html(
+ a2l($1,$2)).
+ '<'!gex;
$http_out->write($_);
}
}
diff --git a/script/mwrap-decode-csv b/script/mwrap-decode-csv
new file mode 100644
index 0000000..5bbc171
--- /dev/null
+++ b/script/mwrap-decode-csv
@@ -0,0 +1,57 @@
+#!perl -w
+# Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
+# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+# addr2line decoder for the output of MWRAP=dump_csv:$FILENAME
+use v5.12;
+use Devel::Mwrap::Rproxy;
+use IO::Handle;
+Devel::Mwrap::Rproxy::decode_csv(*STDIN{IO}, *STDOUT{IO});
+__END__
+=head1 NAME
+
+mwrap-decode-csv - decode non-Perl addresses from mwrap CSV dumps
+
+=head1 SYNOPSIS
+
+ MWRAP=dump_csv:$FILENAME,bt:2 mwrap-perl COMMAND...
+
+ mwrap-decode-csv <$FILENAME
+
+=head1 DESCRIPTION
+
+mwrap-decode-csv is a convenient wrapper for L<addr2line(1)>
+for decoding C backtraces from CSV files.
+
+It reads the CSV via standard input, and emits to standard output.
+
+It expects CSV files emitted by a L<mwrap-perl(1p)> via
+C<MWRAP=dump_csv:$FILENAME> or retrieved directly via C<curl --unix-socket>.
+
+It is not needed for CSVs retrieved via L<mwrap-rproxy(1p)>,
+since mwrap-rproxy already performs the same function as mwrap-decode-csv.
+
+To get useful C backtraces of Perl programs, C<MWRAP=bt:$DEPTH>
+directive must be used (carefully). See L<mwrap-perl(1p)>.
+
+addr2line from GNU binutils 2.39+ (August 2022) is recommended to
+support C<SYMBOL+OFFSET> addresses.
+
+=head1 CONTACT
+
+Feedback welcome via plain-text mail to L<mailto:mwrap-perl@80x24.org>
+
+Mail archives are hosted at L<https://80x24.org/mwrap-perl/>
+
+=head1 COPYRIGHT
+
+Copyright all contributors L<mailto:mwrap-perl@80x24.org>
+
+License: GPL-3.0+ L<https://www.gnu.org/licenses/gpl-3.0.txt>
+
+Source code is at L<https://80x24.org/mwrap-perl.git/>
+
+=head1 SEE ALSO
+
+L<mwrap-perl(1p)>, L<mwrap-rproxy(1)>
+
+=cut
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/4] %p => PID expansion for dump_path + dump_csv
2023-01-11 1:12 [PATCH 0/4] CSV-related improvements Eric Wong
2023-01-11 1:12 ` [PATCH 1/4] support MWRAP=dump_csv:$FILENAME parameter Eric Wong
2023-01-11 1:12 ` [PATCH 2/4] add mwrap-decode-csv tool Eric Wong
@ 2023-01-11 1:12 ` Eric Wong
2023-01-11 1:12 ` [PATCH 4/4] rewrite README and update manpage to favor CSV Eric Wong
3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-11 1:12 UTC (permalink / raw)
To: mwrap-perl
This makes it possible to dump per-PID files for processes which
fork. `%p' matches what the Linux sys.kernel.core_pattern
sysctl understands.
---
mwrap_core.h | 15 +++++++++++++++
script/mwrap-perl | 6 ++++++
t/mwrap.t | 15 +++++++++++++++
3 files changed, 36 insertions(+)
diff --git a/mwrap_core.h b/mwrap_core.h
index fff0538..86e4498 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -895,12 +895,27 @@ __attribute__ ((destructor)) static void mwrap_dtor(void)
if (dump_path) {
char *end = strchr(dump_path, ',');
char buf[PATH_MAX];
+ AUTO_FREE char *pid_path = NULL;
if (end) {
mwrap_assert((end - dump_path) < (intptr_t)sizeof(buf));
end = mempcpy(buf, dump_path, end - dump_path);
*end = 0;
dump_path = buf;
}
+
+ /* %p => PID expansion (Linux core_pattern uses %p, too) */
+ if ((s = strchr(dump_path, '%')) && s[1] == 'p' &&
+ /* don't allow injecting extra formats: */
+ !strchr(s + 2, '%')) {
+ s[1] = 'd'; /* s/%p/%d/ to make asprintf happy */
+ int n = asprintf(&pid_path, dump_path, (int)getpid());
+ if (n < 0)
+ fprintf(stderr,
+ "asprintf failed: %m, dumping to %s\n",
+ dump_path);
+ else
+ dump_path = pid_path;
+ }
dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
0666);
if (dump_fd < 0) {
diff --git a/script/mwrap-perl b/script/mwrap-perl
index eb29176..371aee6 100644
--- a/script/mwrap-perl
+++ b/script/mwrap-perl
@@ -76,6 +76,10 @@ Dumps the output at exit to a given filename:
total_bytes call_count location
+C<$FILENAME> may contain C<%p> where C<%p> is a placeholder for
+the PID being dumped. No other use of C<%> is accepted, and
+multiple C<%> means all C<%> (including C<%p>) are handled as-is.
+
=item dump_fd:$DESCRIPTOR
As with dump_path, but dumps the output to a given file descriptor.
@@ -90,6 +94,8 @@ but is subject to change in future releases.
C<dump_csv> without the C<:> may also be used in conjunction with
C<dump_fd>, such as C<MWRAP=dump_fd:2,dump_csv>.
+Expands C<%p> to the PID in C<$FILENAME> as described for C<dump_path:>
+
=back
=head1 HTTP POST API
diff --git a/t/mwrap.t b/t/mwrap.t
index ccd739b..783f6e7 100644
--- a/t/mwrap.t
+++ b/t/mwrap.t
@@ -42,6 +42,21 @@ my $dump = "$mwrap_tmp/dump";
$nr_comma = ($s =~ tr/,/,/);
$nr_cr = ($s =~ tr/\n/\n/);
ok($nr_comma > ($nr_cr * 4), 'CSV has more commas than CR');
+
+ $env->{MWRAP} = "dump_path:$dump.%p";
+ mwrap_run('dump_path PID expansion', $env, '-e', $script);
+ my @d = grep(/\.\d+\z/, glob("$dump.*"));
+ is(scalar(@d), 1, 'got PID file') or diag explain([glob("$dump*")]);
+ unlink(@d) or BAIL_OUT "unlink: $!";
+
+ # don't allow injecting random formats
+ for my $fmt ('%p.%m', '%m.%p') {
+ my $fn = $dump.$fmt;
+ $env->{MWRAP} = "dump_path:$fn";
+ mwrap_run("PID expansion fails on $fmt", $env, '-e', $script);
+ ok($fn, "$fmt used as-is");
+ unlink($fn) or BAIL_OUT "unlink: $!";
+ }
}
SKIP: { # C++ program which uses malloc via "new"
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 4/4] rewrite README and update manpage to favor CSV
2023-01-11 1:12 [PATCH 0/4] CSV-related improvements Eric Wong
` (2 preceding siblings ...)
2023-01-11 1:12 ` [PATCH 3/4] %p => PID expansion for dump_path + dump_csv Eric Wong
@ 2023-01-11 1:12 ` Eric Wong
3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-11 1:12 UTC (permalink / raw)
To: mwrap-perl
The CSV format has far more info, is self-describing, and
Devel::Mwrap->dump never existed. I'm not sure if I care to
support a Perl API now that the AF_UNIX HTTP interface exists.
---
Mwrap.xs | 5 ++++
README | 61 ++++++++++++++++++++++++++---------------------
script/mwrap-perl | 26 ++++++++++----------
3 files changed, 52 insertions(+), 40 deletions(-)
diff --git a/Mwrap.xs b/Mwrap.xs
index 4d4c996..cbb7d15 100644
--- a/Mwrap.xs
+++ b/Mwrap.xs
@@ -6,6 +6,11 @@
#define MWRAP_PERL 1
#include "mwrap_core.h"
+/*
+ * The Devel::Mwrap Perl API is probably no longer useful now that
+ * the AF_UNIX HTTP interface exists. It'll probably remain undocumented.
+ */
+
/*
* I hate typedefs, especially when they're hiding the fact that there's
* a pointer, but XS needs this, apparently, and it does s/__/::/g
diff --git a/README b/README
index 1fa1dfb..88abe7d 100644
--- a/README
+++ b/README
@@ -4,11 +4,17 @@ Devel::Mwrap is designed to answer the question:
Which lines of Perl are hitting malloc the most?
-Devel::Mwrap wraps all malloc-family calls to trace the Perl source
-location of such calls and bytes allocated at each callsite. It
-can also function as a leak detector and show live allocations
-at every call site. Depending on your application and workload,
-the overhead is roughly a 50%-100% increase memory and runtime.
+Devel::Mwrap wraps all malloc-family calls to trace the Perl
+source location of such calls and bytes allocated at each
+callsite. It is primarily designed to identify high memory use,
+but may function as a leak detector as it can show live
+allocations at every call site. Depending on your application
+and workload, the overhead is roughly a 50%-100% increase memory
+and runtime.
+
+It also gives configurable backtraces of all dynamically-linked
+malloc callsites for any program where backtrace(3) works,
+including programs not linked to Perl.
It is thread-safe and requires the concurrent lock-free hash table
from the Userspace RCU project: https://liburcu.org/
@@ -31,31 +37,30 @@ See `INSTALL' document
== Usage
-Devel::Mwrap works as an LD_PRELOAD and supplies a mwrap-perl script to
-improve ease-of-use. You can set dump_path: in the MWRAP environment
-variable to append the results to a log file:
+Devel::Mwrap works as an LD_PRELOAD and supplies a mwrap-perl wrapper to
+improve ease-of-use. You can set dump_csv: in the MWRAP environment
+variable to append the results to a CSV file:
- MWRAP=dump_path:/path/to/log mwrap-perl PERL_COMMAND
+ MWRAP=dump_csv:/path/to/csv.%p mwrap-perl COMMAND
- # And to display the locations with the most allocations:
- sort -k1,1rn </path/to/log | $PAGER
+For long running processes, you can see the AF_UNIX HTTP interface:
-You may also `use Devel::Mwrap' in your Perl code and use
-Devel::Mwrap->dump, Devel::Mwrap->reset, Devel::Mwrap->each, etc.
+ MWRAP=socket_dir:/some/dir mwrap-perl COMMAND
-However, Devel::Mwrap MUST be loaded via LD_PRELOAD to have any
-effect in tracking malloc use. However, it is safe to keep
-"use Devel::Mwrap" in performance-critical deployments,
-as overhead is only incurred when used as an LD_PRELOAD.
+And connect via `curl --unix-socket /some/dir/$PID.sock' or `mwrap-rproxy'.
+See mwrap-rproxy(1p) and mwrap-perl(1p) manpages for more info.
-The output of the Devel::Mwrap->dump is a text file with 3 columns:
+== Compared to other tools
- total_bytes call_count location
+* mwrap-perl knows about Perl code, and an `mwrap' RubyGem exists, too
-Where location is a Perl source location or an address retrieved
-by backtrace_symbols(3). It is recommended to use the sort(1)
-command on either of the first two columns to find the hottest
-malloc locations.
+* mwrap does not catch memory errors; use ASan, valgrind, or similar
+
+* mwrap is reasonably fast, fast enough for the author to run on
+ production-facing Perl daemons
+
+* the AF_UNIX HTTP interface allows inspecting live processes without
+ interruption instead of waiting for an exit dump
== Known problems
@@ -66,6 +71,8 @@ malloc locations.
* Perl source files over 16.7 million lines long are not supported :P
+* large C backtraces (off by default) are expensive for large programs
+
== Public mail archives (HTTP, Atom feeds, IMAP mailbox, NNTP group, POP3):
https://80x24.org/mwrap-perl/
@@ -73,10 +80,10 @@ malloc locations.
nntps://80x24.org/inbox.comp.lang.perl.mwrap
https://80x24.org/mwrap-perl/_/text/help/#pop3
-No subscription nor real identities will ever be required to obtain support,
-but HTML mail is rejected. Memory usage reductions start with you;
-only send plain-text mail to us and do not top-post. HTML mail and
-top-posting costs everybody memory and bandwidth.
+No subscription nor real identities will ever be required to obtain
+support or contribute, HTML mail is rejected. Memory usage reductions
+start with you; only send plain-text mail to us and do not top-post.
+HTML mail and top-posting costs everybody memory and bandwidth.
mwrap-perl@80x24.org
diff --git a/script/mwrap-perl b/script/mwrap-perl
index 371aee6..cf88375 100644
--- a/script/mwrap-perl
+++ b/script/mwrap-perl
@@ -70,31 +70,31 @@ This may be changed via POST request (see below).
Default: 0
-=item dump_path:$FILENAME
+=item dump_csv:$FILENAME
-Dumps the output at exit to a given filename:
+Dump CSV to the given filename.
- total_bytes call_count location
+This output matches the HTTP server output and includes column headers,
+but is subject to change in future releases.
+
+C<dump_csv> without the C<:> may also be used in conjunction with
+C<dump_fd>, such as C<MWRAP=dump_fd:2,dump_csv>.
C<$FILENAME> may contain C<%p> where C<%p> is a placeholder for
the PID being dumped. No other use of C<%> is accepted, and
multiple C<%> means all C<%> (including C<%p>) are handled as-is.
-=item dump_fd:$DESCRIPTOR
-
-As with dump_path, but dumps the output to a given file descriptor.
+=item dump_path:$FILENAME
-=item dump_csv:$FILENAME
+Gives a simpler, legacy output compatible with the old Ruby version:
-Dump CSV to the given filename.
+ total_bytes call_count location
-This output matches the HTTP server output and includes column headers,
-but is subject to change in future releases.
+Expands C<%p> to the PID in C<$FILENAME> as described for C<dump_csv>
-C<dump_csv> without the C<:> may also be used in conjunction with
-C<dump_fd>, such as C<MWRAP=dump_fd:2,dump_csv>.
+=item dump_fd:$DESCRIPTOR
-Expands C<%p> to the PID in C<$FILENAME> as described for C<dump_path:>
+As with dump_path, but dumps the output to a given file descriptor.
=back
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-01-11 1:12 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-11 1:12 [PATCH 0/4] CSV-related improvements Eric Wong
2023-01-11 1:12 ` [PATCH 1/4] support MWRAP=dump_csv:$FILENAME parameter Eric Wong
2023-01-11 1:12 ` [PATCH 2/4] add mwrap-decode-csv tool Eric Wong
2023-01-11 1:12 ` [PATCH 3/4] %p => PID expansion for dump_path + dump_csv Eric Wong
2023-01-11 1:12 ` [PATCH 4/4] rewrite README and update manpage to favor CSV Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/mwrap-perl.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).