diff options
author | Eric Wong <e@80x24.org> | 2018-07-01 01:54:15 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2018-07-02 01:24:15 +0000 |
commit | c0a29c0f7c829737782c18a0349b8655b3c0388d (patch) | |
tree | 2de91293179f6c48ce31dcd8514a2c2e4218ebd2 | |
download | mwrap-c0a29c0f7c829737782c18a0349b8655b3c0388d.tar.gz |
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | COPYING | 339 | ||||
-rw-r--r-- | MANIFEST | 10 | ||||
-rw-r--r-- | README | 58 | ||||
-rw-r--r-- | Rakefile | 16 | ||||
-rwxr-xr-x | bin/mwrap | 17 | ||||
-rw-r--r-- | ext/mwrap/extconf.rb | 13 | ||||
-rw-r--r-- | ext/mwrap/mwrap.c | 325 | ||||
-rw-r--r-- | mwrap.gemspec | 29 | ||||
-rw-r--r-- | test/test_mwrap.rb | 98 |
10 files changed, 910 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..851644a --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/tmp +*.o +*.so +/pkg +/*.gem @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..fe097e1 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,10 @@ +.gitignore +COPYING +MANIFEST +README +Rakefile +bin/mwrap +ext/mwrap/extconf.rb +ext/mwrap/mwrap.c +mwrap.gemspec +test/test_mwrap.rb @@ -0,0 +1,58 @@ += mwrap - LD_PRELOAD malloc wrapper + line stats for Ruby + +Wraps all malloc, calloc, and realloc calls to trace the Ruby source +location of such calls and bytes allocated at each callsite. This +functionality may change incompatibly or be expanded in the future. + +This is useful for finding malloc hotspots in Ruby code. It does +not track allocation lifetimes, or frees, however. + +Only supports Ruby trunk (2.6.0dev+) a few platforms: + +* GNU/Linux +* FreeBSD 11 + +== Usage + +It works as an LD_PRELOAD and supplies a mwrap RubyGem executable to +improve ease-of-use. You can set dump_fd: in the MWRAP environment +variable to dump the results to a certain file descriptor at exit: + + MWRAP=dump_fd:2 mwrap RUBY_COMMAND + +You may also set dump_path to append to a log file: + + MWRAP=dump_path:/path/to/log mwrap RUBY_COMMAND + +You may also `require 'mwrap'' in your Ruby code and use +Mwrap.dump and Mwrap.clear. + +== Known problems + +* 32-bit machines are prone to overflow (WONTFIX) + +* Allocations outside of GVL are not tracked (TODO) + +== Mail archives and list: + + https://80x24.org/mwrap-public/ + nntp://80x24.org/inbox.comp.lang.ruby.mwrap + +No subscription will ever be required to post, but HTML mail +will be rejected: + + mwrap-public@80x24.org + +== Hacking + + git clone https://80x24.org/mwrap.git + +Send all patches and pull requests (use "git request-pull" to format) to +the mailing list. We do not use centralized or proprietary messaging +systems. + +== License + +GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt> + +Note: we may depend on 3rd-party LGPL/GPL libraries in future releases diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..50bfa89 --- /dev/null +++ b/Rakefile @@ -0,0 +1,16 @@ +# Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org> +# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt> +require 'rake/testtask' +begin + require 'rake/extensiontask' + Rake::ExtensionTask.new('mwrap') +rescue LoadError + warn 'rake-compiler not available, cross compiling disabled' +end + +Rake::TestTask.new(:test) +task :test => :compile +task :default => :compile + +c_files = File.readlines('MANIFEST').grep(%r{ext/.*\.[ch]$}).map!(&:chomp!) +task 'compile:mwrap' => c_files diff --git a/bin/mwrap b/bin/mwrap new file mode 100755 index 0000000..17e3570 --- /dev/null +++ b/bin/mwrap @@ -0,0 +1,17 @@ +#!/usr/bin/ruby +# frozen_string_literal: true +require 'mwrap' +mwrap_so = $".grep(%r{/mwrap\.so\z})[0] or abort "mwrap.so not loaded" +cur = ENV['LD_PRELOAD'] +ENV['LD_PRELOAD'] = cur ? "#{mwrap_so}:#{cur}" : mwrap_so + +# work around close-on-exec by default behavior in Ruby: +opts = {} +if ENV['MWRAP'] =~ /dump_fd:(\d+)/ + dump_fd = $1.to_i + if dump_fd > 2 + dump_io = IO.new(dump_fd) + opts[dump_fd] = dump_io + end +end +exec *ARGV, opts diff --git a/ext/mwrap/extconf.rb b/ext/mwrap/extconf.rb new file mode 100644 index 0000000..dbffd99 --- /dev/null +++ b/ext/mwrap/extconf.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true +# Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org> +# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt> +require 'mkmf' + +have_func 'mempcpy' +if RUBY_PLATFORM =~ /linux/ # should detect glibc + if File.read("/proc/#$$/maps") =~ /\blibjemalloc\./ + $defs << '-DRUBY_USES_JEMALLOC' + end +end +have_library 'dl' +create_makefile 'mwrap' diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c new file mode 100644 index 0000000..a302d8f --- /dev/null +++ b/ext/mwrap/mwrap.c @@ -0,0 +1,325 @@ +/* + * Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org> + * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt> + */ +#include <ruby/ruby.h> +#include <ruby/thread.h> +#include <ruby/util.h> +#include <ruby/st.h> +#include <ruby/io.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <dlfcn.h> +#include <assert.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */ +static int *(*has_gvl_p)(void); +static void *(*real_malloc)(size_t); +static void *(*real_calloc)(size_t, size_t); +static void *(*real_realloc)(void *, size_t); + +/* + * rb_source_location_cstr relies on GET_EC(), and it's possible + * to have a native thread but no EC during the early and late + * (teardown) phases of the Ruby process + */ +static void **ec_loc; + +/* + * we need to fake an OOM condition while dlsym is running, + * as that calls calloc under glibc, but we don't have the + * symbol for the jemalloc calloc, yet + */ +# define RETURN_IF_NOT_READY(x) do { \ + if (!x) { \ + errno = ENOMEM; \ + return NULL; \ + } \ +} while (0) + +__attribute__((constructor)) static void resolve_malloc(void) +{ + real_calloc = dlsym(RTLD_NEXT, "calloc"); + real_malloc = dlsym(RTLD_NEXT, "malloc"); + real_realloc = dlsym(RTLD_NEXT, "realloc"); + assert(real_calloc && real_malloc && real_realloc); + + has_gvl_p = dlsym(RTLD_DEFAULT, "ruby_thread_has_gvl_p"); + + /* + * resolve dynamically so it doesn't break when LD_PRELOAD-ed + * into non-Ruby binaries + */ + ec_loc = dlsym(RTLD_DEFAULT, "ruby_current_execution_context_ptr"); +} + +#ifndef HAVE_MEMPCPY +# define mempcpy(dst,src,n) ((char *)memcpy((dst),(src),(n)) + n) +#endif + +/* stolen from glibc: */ +#define RETURN_ADDRESS(nr) \ + __builtin_extract_return_addr(__builtin_return_address(nr)) + +static __thread size_t locating; +static st_table *stats; /* rb_source_location => size */ + +/* bytes allocated outside of GVL */ +static size_t unknown_bytes; + +#define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19) +static char *int2str(int num, char *dst, size_t * size) +{ + if (num <= 9) { + *size -= 1; + *dst++ = (char)(num + '0'); + return dst; + } else { + char buf[INT2STR_MAX]; + char *end = buf + sizeof(buf); + char *p = end; + size_t adj; + + do { + *size -= 1; + *--p = (char)((num % 10) + '0'); + num /= 10; + } while (num && *size); + + if (!num) { + adj = end - p; + return mempcpy(dst, p, adj); + } + } + return NULL; +} + +static int +update_stat(st_data_t *k, st_data_t *v, st_data_t arg, int existing) +{ + size_t *total = (size_t *) v; + size_t size = arg; + + if (existing) { + *total += size; + } else { + char *key = *(char **)k; + *k = (st_data_t)ruby_strdup(key); + *total = size; + } + return ST_CONTINUE; +} + +static int has_ec_p(void) +{ + return (ec_loc && *ec_loc); +} + +static void update_stats(size_t size, const void *caller) +{ + if (locating++) goto out; /* do not recurse into another *alloc */ + + if (has_gvl_p && has_gvl_p() && has_ec_p()) { + int line; + size_t len; + char *key, *dst; + const char *ptr = rb_source_location_cstr(&line); + size_t int_size = INT2STR_MAX; + + if (!stats) stats = st_init_strtable_with_size(16384); + if (!ptr) goto unknown; + + /* avoid vsnprintf or anything which could call malloc here: */ + len = strlen(ptr); + key = alloca(len + 1 + int_size + 1); + dst = mempcpy(key, ptr, len); + *dst++ = ':'; + dst = int2str(line, dst, &int_size); + if (dst) { + *dst = 0; /* terminate string */ + st_update(stats, (st_data_t)key, + update_stat, (st_data_t)size); + } else { + rb_bug("bad math making key from location %s:%d\n", + ptr, line); + } + } else { /* TODO: do something with caller */ +unknown: + __sync_add_and_fetch(&unknown_bytes, size); + } +out: + --locating; +} + +/* + * Do we care for *memalign? ruby/gc.c uses it in ways this lib + * doesn't care about, but maybe some gems use it, too. + */ +void *malloc(size_t size) +{ + RETURN_IF_NOT_READY(real_malloc); + update_stats(size, RETURN_ADDRESS(0)); + return real_malloc(size); +} + +void *calloc(size_t nmemb, size_t size) +{ + RETURN_IF_NOT_READY(real_calloc); + /* ruby_xcalloc already does overflow checking */ + update_stats(nmemb * size, RETURN_ADDRESS(0)); + return real_calloc(nmemb, size); +} + +void *realloc(void *ptr, size_t size) +{ + RETURN_IF_NOT_READY(real_realloc); + update_stats(size, RETURN_ADDRESS(0)); + return real_realloc(ptr, size); +} + +struct dump_arg { + FILE *fp; + size_t min; +}; + +static int dump_i(const char *key, size_t val, struct dump_arg *a) +{ + if (val > a->min) { + fprintf(a->fp, "%20" PRIuSIZE " %s\n", val, key); + } + + return ST_CONTINUE; +} + +static VALUE dump_to_file(VALUE x) +{ + struct dump_arg *a = (struct dump_arg *)x; + + if (stats) st_foreach(stats, dump_i, (st_data_t) a); + if (unknown_bytes > a->min) { + fprintf(a->fp, "%20" PRIuSIZE " (unknown[%d])\n", + unknown_bytes, getpid()); + } + + return Qnil; +} + +static VALUE dump_ensure(VALUE ignored) +{ + --locating; + return Qfalse; +} + +static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod) +{ + VALUE io, min; + struct dump_arg a; + rb_io_t *fptr; + + rb_scan_args(argc, argv, "02", &io, &min); + + if (NIL_P(io)) + io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr")); + + a.min = NIL_P(min) ? 0 : NUM2SIZET(min); + io = rb_io_get_write_io(io); + GetOpenFile(io, fptr); + a.fp = rb_io_stdio_file(fptr); + + ++locating; + return rb_ensure(dump_to_file, (VALUE) & a, dump_ensure, Qfalse); +} + +static int clear_i(char *key, size_t val, void *ignored) +{ + xfree(key); + return ST_DELETE; +} + +static VALUE mwrap_clear(VALUE mod) +{ + unknown_bytes = 0; + st_foreach(stats, clear_i, 0); + return Qnil; +} + +void Init_mwrap(void) +{ + VALUE mod = rb_define_module("Mwrap"); + + if (!stats) stats = st_init_strtable_with_size(16384); + + rb_define_singleton_method(mod, "dump", mwrap_dump, -1); + rb_define_singleton_method(mod, "clear", mwrap_clear, 0); +} + +/* rb_cloexec_open isn't usable by non-Ruby processes */ +#ifndef O_CLOEXEC +# define O_CLOEXEC 0 +#endif + +__attribute__ ((destructor)) +static void mwrap_dump_destructor(void) +{ + const char *opt = getenv("MWRAP"); + const char *modes[] = { "a", "a+", "w", "w+", "r+" }; + struct dump_arg a; + size_t i; + int dump_fd; + char *dump_path; + + if (!opt) + return; + + ++locating; + if ((dump_path = strstr(opt, "dump_path:")) && + (dump_path += sizeof("dump_path")) && + *dump_path) { + char *end = strchr(dump_path, ','); + if (end) { + char *tmp = alloca(end - dump_path + 1); + *((char *)mempcpy(tmp, dump_path, end - dump_path)) = 0; + dump_path = tmp; + } + dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT, + 0666); + if (dump_fd < 0) { + fprintf(stderr, "open %s failed: %s\n", dump_path, + strerror(errno)); + goto out; + } + } + else if (!sscanf(opt, "dump_fd:%d", &dump_fd)) + goto out; + + if (!sscanf(opt, "dump_min:%zu", &a.min)) + a.min = 0; + + switch (dump_fd) { + case 0: goto out; + case 1: a.fp = stdout; break; + case 2: a.fp = stderr; break; + default: + if (dump_fd < 0) + goto out; + a.fp = 0; + + for (i = 0; !a.fp && i < 5; i++) + a.fp = fdopen(dump_fd, modes[i]); + + if (!a.fp) { + fprintf(stderr, "failed to open fd=%d: %s\n", + dump_fd, strerror(errno)); + goto out; + } + /* we'll leak some memory here, but this is a destructor */ + } + dump_to_file((VALUE)&a); +out: + --locating; +} diff --git a/mwrap.gemspec b/mwrap.gemspec new file mode 100644 index 0000000..7458395 --- /dev/null +++ b/mwrap.gemspec @@ -0,0 +1,29 @@ +git_manifest = `git ls-files 2>/dev/null`.split("\n") +manifest = File.exist?('MANIFEST') ? + File.readlines('MANIFEST').map!(&:chomp).delete_if(&:empty?) : git_manifest +if git_manifest[0] && manifest != git_manifest + tmp = "MANIFEST.#$$.tmp" + File.open(tmp, 'w') { |fp| fp.puts(git_manifest.join("\n")) } + File.rename(tmp, 'MANIFEST') + system('git add MANIFEST') +end + +Gem::Specification.new do |s| + s.name = 'mwrap' + s.version = '0.0.0' + s.homepage = 'https://80x24.org/mwrap.git' + s.authors = ["Ruby hackers"] + s.summary = 'LD_PRELOAD malloc wrapper for Ruby' + s.executables = %w(mwrap) + s.files = manifest + s.description = <<~EOF + EOF + + s.email = %q{e@80x24.org} + s.test_files = Dir['test/test_*.rb'] + s.extensions = %w(ext/mwrap/extconf.rb) + + s.add_development_dependency('test-unit', '~> 3.0') + s.add_development_dependency('rake-compiler', '~> 1.0') + s.licenses = %w(GPL-2.0+) +end diff --git a/test/test_mwrap.rb b/test/test_mwrap.rb new file mode 100644 index 0000000..99073ba --- /dev/null +++ b/test/test_mwrap.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true +# Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org> +# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt> +require 'test/unit' +require 'mwrap' +require 'rbconfig' +require 'tempfile' + +class TestMwrap < Test::Unit::TestCase + RB = "#{RbConfig::CONFIG['bindir']}/#{RbConfig::CONFIG['RUBY_INSTALL_NAME']}" + + mwrap_so = $".grep(%r{/mwrap\.so\z})[0] + env = ENV.to_hash + cur = env['LD_PRELOAD'] + env['LD_PRELOAD'] = cur ? "#{mwrap_so}:#{cur}".freeze : mwrap_so + @@env = env.freeze + inc = File.dirname(mwrap_so) + @@cmd = %W(#{RB} -w --disable=gems -I#{inc} -rmwrap).freeze + + def test_mwrap_preload + cmd = @@cmd + %w( + -e ("helloworld"*1000).clear + -e Mwrap.dump + ) + Tempfile.create('junk') do |tmp| + tmp.sync = true + res = system(@@env, *cmd, err: tmp) + assert res, $?.inspect + tmp.rewind + lines = tmp.readlines + line_1 = lines.grep(/\s-e:1\b/)[0].strip + assert_equal '10001', line_1.split(/\s+/)[0] + end + end + + def test_dump_via_destructor + env = @@env.dup + env['MWRAP'] = 'dump_fd:5' + cmd = @@cmd + %w(-e ("0"*10000).clear) + Tempfile.create('junk') do |tmp| + tmp.sync = true + res = system(env, *cmd, { 5 => tmp }) + assert res, $?.inspect + tmp.rewind + assert_match(/\b10001\s+-e:1$/, tmp.read) + + env['MWRAP'] = 'dump_fd:1,dump_min:10000' + tmp.rewind + tmp.truncate(0) + res = system(env, *cmd, { 1 => tmp }) + assert res, $?.inspect + tmp.rewind + assert_match(/\b10001\s+-e:1$/, tmp.read) + + tmp.rewind + tmp.truncate(0) + env['MWRAP'] = "dump_path:#{tmp.path},dump_min:10000" + res = system(env, *cmd) + assert res, $?.inspect + assert_match(/\b10001\s+-e:1$/, tmp.read) + end + end + + def test_clear + cmd = @@cmd + %w( + -e ("0"*10000).clear + -e Mwrap.clear + -e ("0"*20000).clear + -e Mwrap.dump($stdout,9999) + ) + Tempfile.create('junk') do |tmp| + tmp.sync = true + res = system(@@env, *cmd, { 1 => tmp }) + assert res, $?.inspect + tmp.rewind + buf = tmp.read + assert_not_match(/\s+-e:1$/, buf) + assert_match(/\b20001\s+-e:3$/, buf) + end + end + + # make sure we don't break commands spawned by an mwrap-ed Ruby process: + def test_non_ruby_exec + IO.pipe do |r, w| + th = Thread.new { r.read } + Tempfile.create('junk') do |tmp| + tmp.sync = true + env = @@env.merge('MWRAP' => "dump_path:#{tmp.path}") + cmd = %w(perl -e print("HELLO_WORLD")) + res = system(env, *cmd, out: w) + w.close + assert res, $?.inspect + assert_match(/unknown/, tmp.read) + end + assert_equal "HELLO_WORLD", th.value + end + end +end |