From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS16276 94.23.0.0/16 X-Spam-Status: No, score=-3.3 required=3.0 tests=AWL,BAYES_00, RCVD_IN_MSPIKE_BL,RCVD_IN_MSPIKE_ZBI,RCVD_IN_XBL,SPF_FAIL,SPF_HELO_FAIL, TO_EQ_FM_DOM_SPF_FAIL shortcircuit=no autolearn=no autolearn_force=no version=3.4.0 Received: from 80x24.org (je-suis-le-relais-tor-de.kylaria.fr [94.23.173.249]) by dcvr.yhbt.net (Postfix) with ESMTP id 800EA1F437 for ; Sat, 28 Jan 2017 00:56:57 +0000 (UTC) From: Eric Wong To: spew@80x24.org Subject: [PATCH] io.c: reduce garbage on write Date: Sat, 28 Jan 2017 00:56:45 +0000 Message-Id: <20170128005645.23620-1-e@80x24.org> List-Id: Introduce rb_str_tmp_frozen_acquire and rb_str_tmp_frozen_release to manage a temporarily frozen string. Reuse one bit for embed length for shared strings as STR_IS_SHARED_M to indicate a string has been shared multiple times. --- internal.h | 2 ++ io.c | 40 ++++++++++++++++++++++++++++++++++++---- string.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 4 deletions(-) diff --git a/internal.h b/internal.h index ce4cc3c..a1def8d 100644 --- a/internal.h +++ b/internal.h @@ -1459,6 +1459,8 @@ VALUE rb_id_quote_unprintable(ID); char *rb_str_fill_terminator(VALUE str, const int termlen); void rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen); VALUE rb_str_locktmp_ensure(VALUE str, VALUE (*func)(VALUE), VALUE arg); +VALUE rb_str_tmp_frozen_acquire(VALUE str); +void rb_str_tmp_frozen_release(VALUE str, VALUE tmp); VALUE rb_str_chomp_string(VALUE str, VALUE chomp); #ifdef RUBY_ENCODING_H VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc); diff --git a/io.c b/io.c index 322aabe..d03519b 100644 --- a/io.c +++ b/io.c @@ -1419,10 +1419,40 @@ do_writeconv(VALUE str, rb_io_t *fptr, int *converted) return str; } +struct fwrite_arg { + VALUE orig; + VALUE tmp; + rb_io_t *fptr; + int nosync; +}; + +static VALUE +fwrite_do(VALUE arg) +{ + struct fwrite_arg *fa = (struct fwrite_arg *)arg; + const char *ptr; + long len; + + RSTRING_GETMEM(fa->tmp, ptr, len); + + return (VALUE)io_binwrite(fa->tmp, ptr, len, fa->fptr, fa->nosync); +} + +static VALUE +fwrite_end(VALUE arg) +{ + struct fwrite_arg *fa = (struct fwrite_arg *)arg; + + rb_str_tmp_frozen_release(fa->orig, fa->tmp); + + return Qfalse; +} + static long io_fwrite(VALUE str, rb_io_t *fptr, int nosync) { int converted = 0; + struct fwrite_arg fa; #ifdef _WIN32 if (fptr->mode & FMODE_TTY) { long len = rb_w32_write_console(str, fptr->fd); @@ -1432,11 +1462,13 @@ io_fwrite(VALUE str, rb_io_t *fptr, int nosync) str = do_writeconv(str, fptr, &converted); if (converted) OBJ_FREEZE(str); - else - str = rb_str_new_frozen(str); - return io_binwrite(str, RSTRING_PTR(str), RSTRING_LEN(str), - fptr, nosync); + fa.orig = str; + fa.tmp = rb_str_tmp_frozen_acquire(str); + fa.fptr = fptr; + fa.nosync = nosync; + + return (long)rb_ensure(fwrite_do, (VALUE)&fa, fwrite_end, (VALUE)&fa); } ssize_t diff --git a/string.c b/string.c index b5aae67..6ec37c8 100644 --- a/string.c +++ b/string.c @@ -70,6 +70,7 @@ VALUE rb_cSymbol; * 1: RSTRING_NOEMBED * 2: STR_SHARED (== ELTS_SHARED) * 2-6: RSTRING_EMBED_LEN (5 bits == 32) + * 6: STR_IS_SHARED_M (shared multiple times, only when RSTRING_NOEMBED) * 7: STR_TMPLOCK * 8-9: ENC_CODERANGE (2 bits) * 10-16: ENCODING (7 bits == 128) @@ -79,6 +80,7 @@ VALUE rb_cSymbol; */ #define RUBY_MAX_CHAR_LEN 16 +#define STR_IS_SHARED_M FL_USER6 #define STR_TMPLOCK FL_USER7 #define STR_NOFREE FL_USER18 #define STR_FAKESTR FL_USER19 @@ -150,6 +152,7 @@ VALUE rb_cSymbol; if (!FL_TEST(str, STR_FAKESTR)) { \ RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \ FL_SET((str), STR_SHARED); \ + FL_SET_RAW((shared_str), STR_IS_SHARED_M); \ } \ } while (0) @@ -1127,6 +1130,45 @@ rb_str_new_frozen(VALUE orig) return str; } +VALUE +rb_str_tmp_frozen_acquire(VALUE orig) +{ + VALUE tmp; + + if (OBJ_FROZEN_RAW(orig)) return orig; + + tmp = str_new_frozen(0, orig); + OBJ_INFECT(tmp, orig); + + return tmp; +} + +void +rb_str_tmp_frozen_release(VALUE orig, VALUE tmp) +{ + if (RBASIC_CLASS(tmp) != 0) + return; + + if (FL_TEST_RAW(orig, STR_SHARED) && + !FL_TEST_RAW(orig, STR_TMPLOCK|RUBY_FL_FREEZE)) { + VALUE shared = RSTRING(orig)->as.heap.aux.shared; + + if (shared == tmp && !FL_TEST_RAW(tmp, STR_IS_SHARED_M)) { + FL_UNSET_RAW(orig, STR_SHARED); + assert(RSTRING(orig)->as.heap.ptr == RSTRING(tmp)->as.heap.ptr); + assert(RSTRING(orig)->as.heap.len == RSTRING(tmp)->as.heap.len); + RSTRING(orig)->as.heap.aux.capa = RSTRING(tmp)->as.heap.aux.capa; + RBASIC(orig)->flags |= RBASIC(tmp)->flags & STR_NOFREE; + assert(OBJ_FROZEN_RAW(tmp)); + rb_gc_force_recycle(tmp); + } + } + else if (STR_EMBED_P(tmp)) { + assert(OBJ_FROZEN_RAW(tmp)); + rb_gc_force_recycle(tmp); + } +} + static VALUE str_new_frozen(VALUE klass, VALUE orig) { @@ -1143,6 +1185,7 @@ str_new_frozen(VALUE klass, VALUE orig) assert(!STR_EMBED_P(shared)); assert(OBJ_FROZEN(shared)); + FL_SET_RAW(shared, STR_IS_SHARED_M); if ((ofs > 0) || (rest > 0) || (klass != RBASIC(shared)->klass) || ((RBASIC(shared)->flags ^ RBASIC(orig)->flags) & FL_TAINT) || @@ -1171,6 +1214,7 @@ str_new_frozen(VALUE klass, VALUE orig) RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE; RBASIC(orig)->flags &= ~STR_NOFREE; STR_SET_SHARED(orig, str); + FL_UNSET_RAW(str, STR_IS_SHARED_M); } } -- EW