From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS8100 96.47.226.0/23 X-Spam-Status: No, score=-1.3 required=3.0 tests=AWL,BAYES_00,BODY_8BITS, RCVD_IN_XBL,URIBL_BLOCKED shortcircuit=no autolearn=no version=3.3.2 X-Original-To: spew@80x24.org Received: from 80x24.org (wannabe.torservers.net [96.47.226.22]) by dcvr.yhbt.net (Postfix) with ESMTP id B82A51FA15 for ; Tue, 14 Oct 2014 02:42:37 +0000 (UTC) From: Eric Wong To: spew@80x24.org Subject: [PATCH] opt_str_lit: one instruction, many optimizations Date: Tue, 14 Oct 2014 02:42:35 +0000 Message-Id: X-Mailer: git-send-email 2.1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit List-Id: This is a squash of the following commits in the "opt_str_lit-v2" branch of git://bogomips.org/ruby.git It optimizes away object allocation for string literals in the following cases: * "lit" % obj * str << "lit" * "lit" + str * str + "lit" * "lit" * num * "lit" === obj * obj === "lit" * "lit" == str * str == "lit" * "lit" != str * str != "lit" * str.(gsub,sub,tr,tr_s)(!)(any, "lit_b") Full commit logs are in the git repository above, and also viewable with $BROWSER at: http://bogomips.org/ruby.git/log/?h=opt_str_lit-v2 --- benchmark/bm_vm2_gsub_bang_lit.rb | 6 + benchmark/bm_vm2_gsub_bang_re.rb | 6 + benchmark/bm_vm2_gsub_re.rb | 6 + benchmark/bm_vm2_hash_aref_lit.rb | 6 + benchmark/bm_vm2_hash_aset_lit.rb | 6 + benchmark/bm_vm2_strcat.rb | 7 ++ benchmark/bm_vm2_streq1.rb | 6 + benchmark/bm_vm2_streq2.rb | 6 + benchmark/bm_vm2_streqq1.rb | 6 + benchmark/bm_vm2_streqq2.rb | 6 + benchmark/bm_vm2_strfmt.rb | 5 + benchmark/bm_vm2_strplus1.rb | 6 + benchmark/bm_vm2_strplus2.rb | 6 + benchmark/bm_vm2_tr_bang.rb | 7 ++ common.mk | 18 ++- compile.c | 255 ++++++++++++++++++++++++++++++++------ defs/id.def | 9 ++ defs/opt_method.def | 57 +++++++++ insns.def | 219 +++++++++++++++----------------- template/opt_method.h.tmpl | 71 +++++++++++ template/opt_method.inc.tmpl | 49 ++++++++ test/-ext-/symbol/test_type.rb | 1 + test/objspace/test_objspace.rb | 1 + test/ruby/envutil.rb | 10 ++ test/ruby/test_hash.rb | 2 + test/ruby/test_iseq.rb | 1 + test/ruby/test_string.rb | 193 +++++++++++++++++++++++++++++ vm.c | 67 ++-------- vm_core.h | 44 +------ vm_insnhelper.c | 8 +- vm_insnhelper.h | 25 +++- 31 files changed, 853 insertions(+), 262 deletions(-) diff --git a/benchmark/bm_vm2_gsub_bang_lit.rb b/benchmark/bm_vm2_gsub_bang_lit.rb new file mode 100644 index 0000000..9251fb1 --- /dev/null +++ b/benchmark/bm_vm2_gsub_bang_lit.rb @@ -0,0 +1,6 @@ +i = 0 +str = "" +while i<6_000_000 # benchmark loop 2 + i += 1 + str.gsub!("nomatch", "") +end diff --git a/benchmark/bm_vm2_gsub_bang_re.rb b/benchmark/bm_vm2_gsub_bang_re.rb new file mode 100644 index 0000000..e5fc9ea --- /dev/null +++ b/benchmark/bm_vm2_gsub_bang_re.rb @@ -0,0 +1,6 @@ +i = 0 +str = "" +while i<6_000_000 # benchmark loop 2 + i += 1 + str.gsub!(/a/, "") +end diff --git a/benchmark/bm_vm2_gsub_re.rb b/benchmark/bm_vm2_gsub_re.rb new file mode 100644 index 0000000..606f247 --- /dev/null +++ b/benchmark/bm_vm2_gsub_re.rb @@ -0,0 +1,6 @@ +i = 0 +str = "" +while i<6_000_000 # benchmark loop 2 + i += 1 + str.gsub(/a/, "") +end diff --git a/benchmark/bm_vm2_hash_aref_lit.rb b/benchmark/bm_vm2_hash_aref_lit.rb new file mode 100644 index 0000000..a6d4d12 --- /dev/null +++ b/benchmark/bm_vm2_hash_aref_lit.rb @@ -0,0 +1,6 @@ +h = { "foo" => nil } +i = 0 +while i<6_000_000 # while loop 2 + i += 1 + h["foo"] +end diff --git a/benchmark/bm_vm2_hash_aset_lit.rb b/benchmark/bm_vm2_hash_aset_lit.rb new file mode 100644 index 0000000..58339ec --- /dev/null +++ b/benchmark/bm_vm2_hash_aset_lit.rb @@ -0,0 +1,6 @@ +h = {} +i = 0 +while i<6_000_000 # while loop 2 + i += 1 + h["foo"] = nil +end diff --git a/benchmark/bm_vm2_strcat.rb b/benchmark/bm_vm2_strcat.rb new file mode 100644 index 0000000..b25ac6e --- /dev/null +++ b/benchmark/bm_vm2_strcat.rb @@ -0,0 +1,7 @@ +i = 0 +str = "" +while i<6_000_000 # benchmark loop 2 + i += 1 + str << "const" + str.clear +end diff --git a/benchmark/bm_vm2_streq1.rb b/benchmark/bm_vm2_streq1.rb new file mode 100644 index 0000000..2a4b0f8 --- /dev/null +++ b/benchmark/bm_vm2_streq1.rb @@ -0,0 +1,6 @@ +i = 0 +foo = "literal" +while i<6_000_000 # benchmark loop 2 + i += 1 + foo == "literal" +end diff --git a/benchmark/bm_vm2_streq2.rb b/benchmark/bm_vm2_streq2.rb new file mode 100644 index 0000000..986020d --- /dev/null +++ b/benchmark/bm_vm2_streq2.rb @@ -0,0 +1,6 @@ +i = 0 +foo = "literal" +while i<6_000_000 # benchmark loop 2 + i += 1 + "literal" == foo +end diff --git a/benchmark/bm_vm2_streqq1.rb b/benchmark/bm_vm2_streqq1.rb new file mode 100644 index 0000000..9183466 --- /dev/null +++ b/benchmark/bm_vm2_streqq1.rb @@ -0,0 +1,6 @@ +i = 0 +foo = "literal" +while i<6_000_000 # benchmark loop 2 + i += 1 + foo === "literal" +end diff --git a/benchmark/bm_vm2_streqq2.rb b/benchmark/bm_vm2_streqq2.rb new file mode 100644 index 0000000..f48a9cd --- /dev/null +++ b/benchmark/bm_vm2_streqq2.rb @@ -0,0 +1,6 @@ +i = 0 +foo = "literal" +while i<6_000_000 # benchmark loop 2 + i += 1 + "literal" === foo +end diff --git a/benchmark/bm_vm2_strfmt.rb b/benchmark/bm_vm2_strfmt.rb new file mode 100644 index 0000000..efb88b6 --- /dev/null +++ b/benchmark/bm_vm2_strfmt.rb @@ -0,0 +1,5 @@ +i = 0 +while i<6_000_000 # benchmark loop 2 + i += 1 + "%d" % i +end diff --git a/benchmark/bm_vm2_strplus1.rb b/benchmark/bm_vm2_strplus1.rb new file mode 100644 index 0000000..714efb8 --- /dev/null +++ b/benchmark/bm_vm2_strplus1.rb @@ -0,0 +1,6 @@ +i = 0 +foo = "a" +while i<6_000_000 # benchmark loop 2 + i += 1 + foo + "b" +end diff --git a/benchmark/bm_vm2_strplus2.rb b/benchmark/bm_vm2_strplus2.rb new file mode 100644 index 0000000..c7f91ed --- /dev/null +++ b/benchmark/bm_vm2_strplus2.rb @@ -0,0 +1,6 @@ +i = 0 +foo = "a" +while i<6_000_000 # benchmark loop 2 + i += 1 + "b" + foo +end diff --git a/benchmark/bm_vm2_tr_bang.rb b/benchmark/bm_vm2_tr_bang.rb new file mode 100644 index 0000000..8065a65 --- /dev/null +++ b/benchmark/bm_vm2_tr_bang.rb @@ -0,0 +1,7 @@ +i = 0 +str = "a" +while i<6_000_000 # benchmark loop 2 + i += 1 + str.tr!("a", "A") + str.tr!("A", "a") +end diff --git a/common.mk b/common.mk index ce01aca..0a533a5 100644 --- a/common.mk +++ b/common.mk @@ -639,7 +639,7 @@ PROBES_H_INCLUDES = {$(VPATH)}probes.h VM_CORE_H_INCLUDES = {$(VPATH)}vm_core.h {$(VPATH)}thread_$(THREAD_MODEL).h \ {$(VPATH)}node.h {$(VPATH)}method.h {$(VPATH)}ruby_atomic.h \ {$(VPATH)}vm_debug.h {$(VPATH)}id.h {$(VPATH)}thread_native.h \ - $(CCAN_LIST_INCLUDES) + $(CCAN_LIST_INCLUDES) {$(VPATH)}opt_method.h ### @@ -826,7 +826,7 @@ vm.$(OBJEXT): {$(VPATH)}vm.c {$(VPATH)}gc.h {$(VPATH)}iseq.h \ $(VM_CORE_H_INCLUDES) {$(VPATH)}vm_method.c {$(VPATH)}vm_eval.c \ {$(VPATH)}vm_insnhelper.c {$(VPATH)}vm_insnhelper.h {$(VPATH)}vm_exec.c \ {$(VPATH)}vm_exec.h {$(VPATH)}insns.def {$(VPATH)}vmtc.inc \ - {$(VPATH)}vm.inc {$(VPATH)}insns.inc \ + {$(VPATH)}vm.inc {$(VPATH)}insns.inc {$(VPATH)}opt_method.inc \ {$(VPATH)}internal.h {$(VPATH)}vm.h {$(VPATH)}constant.h \ $(PROBES_H_INCLUDES) {$(VPATH)}probes_helper.h {$(VPATH)}vm_opts.h vm_dump.$(OBJEXT): {$(VPATH)}vm_dump.c $(RUBY_H_INCLUDES) \ @@ -931,6 +931,20 @@ incs: $(INSNS) {$(VPATH)}node_name.inc {$(VPATH)}encdb.h {$(VPATH)}transdb.h {$( insns: $(INSNS) +opt_method.h: $(srcdir)/tool/generic_erb.rb \ + $(srcdir)/template/opt_method.h.tmpl \ + $(srcdir)/defs/opt_method.def + $(ECHO) generating $@ + $(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \ + $(srcdir)/template/opt_method.h.tmpl + +opt_method.inc: $(srcdir)/tool/generic_erb.rb \ + $(srcdir)/template/opt_method.inc.tmpl \ + $(srcdir)/defs/opt_method.def + $(ECHO) generating $@ + $(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \ + $(srcdir)/template/opt_method.inc.tmpl + id.h: $(srcdir)/tool/generic_erb.rb $(srcdir)/template/id.h.tmpl $(srcdir)/defs/id.def $(ECHO) generating $@ $(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \ diff --git a/compile.c b/compile.c index 8df7acf..205ff6a 100644 --- a/compile.c +++ b/compile.c @@ -1703,6 +1703,96 @@ get_prev_insn(INSN *iobj) return 0; } +#define new_recvinfo_for_put(iseq,str,mid,klass) \ + new_recvinfo_for_put_(iseq,str,OM_##mid##__##klass) +static VALUE +new_recvinfo_for_put_(rb_iseq_t *iseq, VALUE str, enum ruby_optimized_method om) +{ + VALUE ri = rb_ary_new_from_args(2, str, INT2FIX(om)); + + hide_obj(ri); + iseq_add_mark_object(iseq, ri); + + return ri; +} + +#define new_recvinfo_for_call(iseq,str,mid,klass) \ + new_recvinfo_for_call_((iseq),(str),OM_##mid##__##klass,(mid)) +static VALUE +new_recvinfo_for_call_(rb_iseq_t *iseq, VALUE str, + enum ruby_optimized_method om, ID mid) +{ + VALUE ri = rb_ary_new_from_args(3, str, INT2FIX(om), ID2SYM(mid)); + + hide_obj(ri); + iseq_add_mark_object(iseq, ri); + + return ri; +} + +#define new_recvinfo_for_arg(iseq,str,mid,klass,off) \ + new_recvinfo_for_arg_((iseq),(str),OM_##mid##__##klass,(rb_c##klass),(off)) +static VALUE +new_recvinfo_for_arg_(rb_iseq_t *iseq, VALUE str, + enum ruby_optimized_method om, VALUE klass, int recv_off) +{ + VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(om), + klass, INT2FIX(recv_off)); + + hide_obj(ri); + iseq_add_mark_object(iseq, ri); + + return ri; +} + +/* + * optimize common calls which take two string literals: + * foo.sub(/../, "to") + * foo.sub!(/../, "to") + * foo.gsub(/../, "to") + * foo.gsub!(/../, "to") + * foo.tr(/../, "to") + * foo.tr!(/../, "to") + * foo.tr_s(/../, "to") + * foo.tr_s!(/../, "to") + */ +static VALUE +opt_str_lit_2(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list) +{ + INSN *piobj; + enum ruby_optimized_method om = OM_LAST_; + + switch (ci->mid) { +#define C(mid) case mid: om = OM_##mid##__String; break + C(idSub); + C(idSub_bang); + C(idGsub); + C(idGsub_bang); + C(idTr); + C(idTr_bang); + C(idTr_s); + C(idTr_s_bang); +#undef C + default: return Qfalse; + } + + /* + * previous arg may be a string literal, too: + * foo.gsub!("from", "to") + * foo.tr!("from", "to") + * .. + */ + piobj = (INSN *)get_prev_insn(list); + if (piobj && piobj->insn_id == BIN(putstring)) { + VALUE pstr = piobj->operands[0]; + VALUE pri = new_recvinfo_for_arg_(iseq, pstr, om, rb_cString, 0); + piobj->operands[0] = pri; + piobj->insn_id = BIN(opt_str_lit); + } + + return new_recvinfo_for_arg_(iseq, str, om, rb_cString, 1); +} + static int iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcallopt) { @@ -1819,6 +1909,79 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal } } } + + /* string literal optimizations */ + if (iobj->insn_id == BIN(putstring)) { + INSN *niobj = (INSN *)get_next_insn((INSN *)list); + + if (niobj && niobj->insn_id == BIN(send)) { + rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0]; + + if (!ci->blockiseq && !(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP)) { + VALUE ri = Qfalse; + VALUE str = iobj->operands[0]; + + switch (ci->orig_argc) { + case 0: + /* + * optimize: + * "literal".freeze + * "literal".size + * "literal".length + */ + switch (ci->mid) { + case idFreeze: + ri = new_recvinfo_for_call(iseq, str, idFreeze, String); + REMOVE_ELEM((LINK_ELEMENT *)niobj); + break; + case idSize: + ri = new_recvinfo_for_put(iseq, str, idSize, String); + break; + case idLength: + ri = new_recvinfo_for_put(iseq, str, idLength, String); + break; + } + break; + case 1: + switch (ci->mid) { + case idAREF: + /* optimize allocation: obj["lit"] */ + ri = new_recvinfo_for_arg(iseq, str, idAREF, Hash, 0); + break; + case idEq: + /* optimize allocation: obj == "lit" */ + ri = new_recvinfo_for_arg(iseq, str, idEq, String, 0); + break; + case idNeq: + /* optimize allocation: obj != "lit" */ + ri = new_recvinfo_for_arg(iseq, str, idNeq, String, 0); + break; + case idLTLT: + /* optimize allocation: obj << "lit" */ + ri = new_recvinfo_for_arg(iseq, str, idLTLT, String, 0); + break; + case idPLUS: + /* optimize allocation: obj + "lit" */ + ri = new_recvinfo_for_arg(iseq, str, idPLUS, String, 0); + break; + case idEqq: + /* optimize allocation: obj === "lit" */ + ri = new_recvinfo_for_arg(iseq, str, idEqq, String, 0); + break; + } + break; + case 2: + ri = opt_str_lit_2(iseq, str, ci, (INSN *)list); + break; + } + if (ri != Qfalse) { + iobj->insn_id = BIN(opt_str_lit); + iobj->operands[0] = ri; + } + } + } + } + return COMPILE_OK; } @@ -3096,6 +3259,20 @@ build_postexe_iseq(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *body) return Qnil; } +static enum ruby_optimized_method +opt_str_lit_recv_om(ID mid) +{ + switch (mid) { + case idEq: return OM_idEq__String; + case idNeq: return OM_idNeq__String; + case idPLUS: return OM_idPLUS__String; + case idMULT: return OM_idMULT__String; + case idMOD: return OM_idMOD__String; + case idEqq: return OM_idEqq__String; + } + return OM_LAST_; +} + /** compile each node @@ -4238,37 +4415,6 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped) break; } case NODE_CALL: - /* optimization shortcut - * "literal".freeze -> opt_str_freeze("literal") - */ - if (node->nd_recv && nd_type(node->nd_recv) == NODE_STR && - node->nd_mid == idFreeze && node->nd_args == NULL) - { - VALUE str = rb_fstring(node->nd_recv->nd_lit); - iseq_add_mark_object(iseq, str); - ADD_INSN1(ret, line, opt_str_freeze, str); - if (poped) { - ADD_INSN(ret, line, pop); - } - break; - } - /* optimization shortcut - * obj["literal"] -> opt_aref_with(obj, "literal") - */ - if (node->nd_mid == idAREF && !private_recv_p(node) && node->nd_args && - nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 1 && - nd_type(node->nd_args->nd_head) == NODE_STR) - { - VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit); - node->nd_args->nd_head->nd_lit = str; - COMPILE(ret, "recv", node->nd_recv); - ADD_INSN2(ret, line, opt_aref_with, - new_callinfo(iseq, idAREF, 1, 0, 0), str); - if (poped) { - ADD_INSN(ret, line, pop); - } - break; - } case NODE_FCALL: case NODE_VCALL:{ /* VCALL: variable or call */ /* @@ -4352,7 +4498,30 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped) #endif /* receiver */ if (type == NODE_CALL) { - COMPILE(recv, "recv", node->nd_recv); + enum ruby_optimized_method om; + /* + * optimize: + * "yoda" == other -> opt_str_lit("yoda").send(:==, other) + * "yoda" != other -> opt_str_lit("yoda").send(:!=, other) + * "str" + other -> opt_str_lit("str").send(:+, other) + * "str" * other -> opt_str_lit("str").send(:*, other) + * "fmt" % args -> opt_str_lit("str").send(:%, other) + */ + if (iseq->compile_data->option->peephole_optimization && + ((om = opt_str_lit_recv_om(mid)) != OM_LAST_) && + !private_recv_p(node) && + node->nd_recv && nd_type(node->nd_recv) == NODE_STR && + node->nd_args && nd_type(node->nd_args) == NODE_ARRAY && + node->nd_args->nd_alen == 1) + { + VALUE yoda = rb_fstring(node->nd_recv->nd_lit); + VALUE recv_info = new_recvinfo_for_put_(iseq, yoda, om); + + node->nd_recv->nd_lit = yoda; + ADD_INSN1(recv, line, opt_str_lit, recv_info); + } else { + COMPILE(recv, "recv", node->nd_recv); + } } else if (type == NODE_FCALL || type == NODE_VCALL) { ADD_CALL_RECEIVER(recv, line); @@ -5241,23 +5410,31 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped) int asgnflag; /* optimization shortcut - * obj["literal"] = value -> opt_aset_with(obj, "literal", value) + * obj["literal"] = val -> send(obj, :[]=, opt_str_lit("lit"), val) + * TODO: ideally this should be done inside iseq_peephole_optimize, + * but that would require a lot of scanning as the `val' (2nd arg) + * is of variable distance between the :putstring and :send insns */ - if (node->nd_mid == idASET && !private_recv_p(node) && node->nd_args && + if (iseq->compile_data->option->peephole_optimization && + node->nd_mid == idASET && !private_recv_p(node) && node->nd_args && nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 2 && nd_type(node->nd_args->nd_head) == NODE_STR) { VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit); + VALUE recv_info = new_recvinfo_for_arg(iseq, str, idASET, Hash, 0); + node->nd_args->nd_head->nd_lit = str; - iseq_add_mark_object(iseq, str); + if (!poped) { + ADD_INSN(ret, line, putnil); + } COMPILE(ret, "recv", node->nd_recv); + ADD_INSN1(ret, line, opt_str_lit, recv_info); COMPILE(ret, "value", node->nd_args->nd_next->nd_head); if (!poped) { - ADD_INSN(ret, line, swap); - ADD_INSN1(ret, line, topn, INT2FIX(1)); + ADD_INSN1(ret, line, setn, INT2FIX(3)); } - ADD_INSN2(ret, line, opt_aset_with, - new_callinfo(iseq, idASET, 2, 0, 0), str); + flag = VM_CALL_ARGS_SKIP_SETUP; + ADD_SEND_R(ret, line, node->nd_mid, 2, 0, INT2FIX(flag)); ADD_INSN(ret, line, pop); break; } diff --git a/defs/id.def b/defs/id.def index f7fffbd..21aff93 100644 --- a/defs/id.def +++ b/defs/id.def @@ -57,6 +57,14 @@ firstline, predefined = __LINE__+1, %[\ core#hash_merge_ary core#hash_merge_ptr core#hash_merge_kwd + gsub + gsub! + sub + sub! + tr + tr! + tr_s + tr_s! ] class KeywordError < RuntimeError @@ -83,6 +91,7 @@ predefined.split(/^/).each_with_index do |line, num| token = "_#{token.gsub(/\W+/, '_')}" else token = token.sub(/\?/, 'P').sub(/\A[a-z]/) {$&.upcase} + token.sub!(/!\z/, "_bang") token.sub!(/\A\$/, "_G_") token.sub!(/\A@@/, "_C_") token.sub!(/\A@/, "_I_") diff --git a/defs/opt_method.def b/defs/opt_method.def new file mode 100644 index 0000000..e96cc9b --- /dev/null +++ b/defs/opt_method.def @@ -0,0 +1,57 @@ +# byte align the bitmap for now, maybe some arches do better with long or int +# we may also use a larger size (in the unlikely case) we need more than +# 7 optimized classes per mid. Currently this caps us to 256 optimized +# (mid, klass) combinations (tested with OM_SHIFT=4, giving us 64K) +OM_SHIFT = 3 +OM_ALIGN = 1 << OM_SHIFT +OM_ALIGN_MASK = ~(OM_ALIGN - 1) +OPT_METHODS = [ + %w(idPLUS Fixnum Float String Array), + %w(idMINUS Fixnum Float), + %w(idMULT Fixnum Float String), + %w(idDIV Fixnum Float), + %w(idMOD Fixnum Float String), + %w(idEq Fixnum Float String), + %w(idNeq Fixnum Float String), + # id, mask classes + [ 'idEqq', %w(Bignum Fixnum Float Symbol), *%w(String) ], + %w(idLT Fixnum Float), + %w(idLE Fixnum Float), + %w(idGT Fixnum Float), + %w(idGE Fixnum Float), + %w(idLTLT String Array), + %w(idAREF Array Hash), + %w(idASET Array Hash), + %w(idLength Array String Hash), + %w(idSize Array String Hash), + %w(idEmptyP Array String Hash), + %w(idSucc Fixnum String Time), + %w(idEqTilde Regexp String), + %w(idFreeze String), + %w(idGsub String), + %w(idGsub_bang String), + %w(idSub String), + %w(idSub_bang String), + %w(idTr String), + %w(idTr_bang String), + %w(idTr_s String), + %w(idTr_s_bang String), +] + +# for checking optimized classes, +# speeds up method definitions of non-core classes +def opt_classes + rv = {} + OPT_METHODS.each do |(_, *classes)| + classes.flatten.each { |c| rv[c] = true } + end + rv +end + +def om(mid, klass) + if Array === klass + "OM_#{mid}__#{klass.join('_')}" + else + "OM_#{mid}__#{klass}" + end +end diff --git a/insns.def b/insns.def index bfa11a9..e304338 100644 --- a/insns.def +++ b/insns.def @@ -356,6 +356,61 @@ putstring /** @c put + @e put string val. string may be created depending on recv_info conditions + */ +DEFINE_INSN +opt_str_lit +(VALUE recv_info) +() +(VALUE val) +{ + /* + * recv_info: + * 0 - str + * 1 - optimized method flag (OM_*) + * optional: + * 2 - Class (optimized receiver class) or Symbol (method name) + * 3 - stack offset (Fixint), only present if [3] is a Class, + * -1 stack offset means receiver is the frozen string literal itself + */ + const VALUE *ri = RARRAY_CONST_PTR(recv_info); + long len = RARRAY_LEN(recv_info); + enum ruby_optimized_method om = FIX2INT(ri[1]); + + val = ri[0]; /* hopefully, this is the only val assignment we need */ + if (len > 2) { + VALUE msym_or_class = ri[2]; + + /* check if the receiver is an on-stack object: */ + if (!SYMBOL_P(msym_or_class)) { + int n = FIX2INT(ri[3]); + VALUE recv = n < 0 ? val : TOPN(n); + + if (SPECIAL_CONST_P(recv) || + RBASIC_CLASS(recv) != msym_or_class || + !rb_basic_op_unredefined_p(om)) { + /* bad, somebody redefined an optimized method, slow path: */ + val = rb_str_resurrect(val); + } + } + else { /* receiver is the string literal itself (e.g. "str".freeze) */ + if (!rb_basic_op_unredefined_p(om)) { + /* bad, somebody redefined an optimized method, slow path: */ + val = rb_str_resurrect(val); + val = rb_funcall(val, SYM2ID(msym_or_class), 0); + } + } + } + else { /* string lit is receiver, but there are args */ + if (!rb_basic_op_unredefined_p(om)) { + /* bad, somebody redefined an optimized method, slow path: */ + val = rb_str_resurrect(val); + } + } +} + +/** + @c put @e put concatenate strings @j スタックトップの文字列を n 個連結し,結果をスタックにプッシュする。 */ @@ -999,20 +1054,6 @@ send CALL_METHOD(ci); } -DEFINE_INSN -opt_str_freeze -(VALUE str) -() -(VALUE val) -{ - if (BASIC_OP_UNREDEFINED_P(BOP_FREEZE, STRING_REDEFINED_OP_FLAG)) { - val = str; - } - else { - val = rb_funcall(rb_str_resurrect(str), idFreeze, 0); - } -} - /** @c optimize @e Invoke method without block, splat @@ -1285,11 +1326,7 @@ opt_case_dispatch case T_FIXNUM: case T_BIGNUM: case T_STRING: - if (BASIC_OP_UNREDEFINED_P(BOP_EQQ, - SYMBOL_REDEFINED_OP_FLAG | - FIXNUM_REDEFINED_OP_FLAG | - BIGNUM_REDEFINED_OP_FLAG | - STRING_REDEFINED_OP_FLAG)) { + if (rb_basic_mask_unredefined_p(OM_idEqq__Bignum_Fixnum_Float_Symbol)) { st_data_t val; if (st_lookup(RHASH_TBL_RAW(hash), key, &val)) { JUMP(FIX2INT((VALUE)val)); @@ -1317,8 +1354,7 @@ opt_plus (VALUE recv, VALUE obj) (VALUE val) { - if (FIXNUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_PLUS,FIXNUM_REDEFINED_OP_FLAG)) { + if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idPLUS, Fixnum)) { /* fixnum + fixnum */ #ifndef LONG_LONG_VALUE val = (recv + (obj & (~1))); @@ -1341,20 +1377,20 @@ opt_plus #endif } else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idPLUS, Float)) { val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); } else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idPLUS, Float)) { val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); } else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idPLUS, String)) { val = rb_str_plus(recv, obj); } else if (RBASIC_CLASS(recv) == rb_cArray && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idPLUS, Array)) { val = rb_ary_plus(recv, obj); } else { @@ -1381,7 +1417,7 @@ opt_minus (VALUE val) { if (FIXNUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_MINUS, FIXNUM_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idMINUS, Fixnum)) { long a, b, c; a = FIX2LONG(recv); @@ -1396,12 +1432,12 @@ opt_minus } } else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idMINUS, Float)) { val = DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj)); } else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && - BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idMINUS, Float)) { val = DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj)); } else { @@ -1429,7 +1465,7 @@ opt_mult (VALUE val) { if (FIXNUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_MULT, FIXNUM_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idMULT, Fixnum)) { long a, b; a = FIX2LONG(recv); @@ -1446,13 +1482,12 @@ opt_mult } } } - else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) { + else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMULT, Float)) { val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj)); } else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && - BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idMULT, Float)) { val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj)); } else { @@ -1478,8 +1513,7 @@ opt_div (VALUE recv, VALUE obj) (VALUE val) { - if (FIXNUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_DIV, FIXNUM_REDEFINED_OP_FLAG)) { + if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idDIV, Fixnum)) { long x, y, div; x = FIX2LONG(recv); @@ -1509,13 +1543,12 @@ opt_div } val = LONG2NUM(div); } - else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) { + else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idDIV, Float)) { val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj)); } else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && - BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idDIV, Float)) { val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj)); } else { @@ -1541,8 +1574,7 @@ opt_mod (VALUE recv, VALUE obj) (VALUE val) { - if (FIXNUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_MOD, FIXNUM_REDEFINED_OP_FLAG )) { + if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMOD, Fixnum )) { long x, y; x = FIX2LONG(recv); @@ -1576,13 +1608,12 @@ opt_mod val = LONG2FIX(mod); } } - else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) { + else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMOD, Float)) { val = DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj))); } else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && - BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idMOD, Float)) { val = DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj))); } else { @@ -1661,7 +1692,7 @@ opt_lt (VALUE val) { if (FIXNUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_LT, FIXNUM_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idLT, Fixnum)) { SIGNED_VALUE a = recv, b = obj; if (a < b) { @@ -1672,13 +1703,13 @@ opt_lt } } else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idLT, Float)) { /* flonum is not NaN */ val = RFLOAT_VALUE(recv) < RFLOAT_VALUE(obj) ? Qtrue : Qfalse; } else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && - BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idLT, Float)) { val = double_cmp_lt(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)); } else { @@ -1705,7 +1736,7 @@ opt_le (VALUE val) { if (FIXNUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_LE, FIXNUM_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idLE, Fixnum)) { SIGNED_VALUE a = recv, b = obj; if (a <= b) { @@ -1716,7 +1747,7 @@ opt_le } } else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_LE, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idLE, Float)) { /* flonum is not NaN */ val = RFLOAT_VALUE(recv) <= RFLOAT_VALUE(obj) ? Qtrue : Qfalse; } @@ -1740,7 +1771,7 @@ opt_gt (VALUE val) { if (FIXNUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_GT, FIXNUM_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idGT, Fixnum)) { SIGNED_VALUE a = recv, b = obj; if (a > b) { @@ -1751,13 +1782,13 @@ opt_gt } } else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idGT, Float)) { /* flonum is not NaN */ val = RFLOAT_VALUE(recv) > RFLOAT_VALUE(obj) ? Qtrue : Qfalse; } else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && - BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idGT, Float)) { val = double_cmp_gt(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)); } else { @@ -1784,7 +1815,7 @@ opt_ge (VALUE val) { if (FIXNUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_GE, FIXNUM_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idGE, Fixnum)) { SIGNED_VALUE a = recv, b = obj; if (a >= b) { @@ -1794,8 +1825,7 @@ opt_ge val = Qfalse; } } - else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_GE, FLOAT_REDEFINED_OP_FLAG)) { + else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idGE, Float)) { /* flonum is not NaN */ val = RFLOAT_VALUE(recv) >= RFLOAT_VALUE(obj) ? Qtrue : Qfalse; } @@ -1819,11 +1849,11 @@ opt_ltlt { if (!SPECIAL_CONST_P(recv)) { if (RBASIC_CLASS(recv) == rb_cString && - BASIC_OP_UNREDEFINED_P(BOP_LTLT, STRING_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idLTLT, String)) { val = rb_str_concat(recv, obj); } else if (RBASIC_CLASS(recv) == rb_cArray && - BASIC_OP_UNREDEFINED_P(BOP_LTLT, ARRAY_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idLTLT, Array)) { val = rb_ary_push(recv, obj); } else { @@ -1850,10 +1880,10 @@ opt_aref (VALUE val) { if (!SPECIAL_CONST_P(recv)) { - if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(BOP_AREF, ARRAY_REDEFINED_OP_FLAG) && FIXNUM_P(obj)) { + if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(idAREF, Array) && FIXNUM_P(obj)) { val = rb_ary_entry(recv, FIX2LONG(obj)); } - else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) { + else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(idAREF, Hash)) { val = rb_hash_aref(recv, obj); } else { @@ -1880,11 +1910,11 @@ opt_aset (VALUE val) { if (!SPECIAL_CONST_P(recv)) { - if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(BOP_ASET, ARRAY_REDEFINED_OP_FLAG) && FIXNUM_P(obj)) { + if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(idASET, Array) && FIXNUM_P(obj)) { rb_ary_store(recv, FIX2LONG(obj), set); val = set; } - else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) { + else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(idASET, Hash)) { rb_hash_aset(recv, obj, set); val = set; } @@ -1903,49 +1933,6 @@ opt_aset /** @c optimize - @e recv[str] = set - @j 最適化された recv[str] = set。 - */ -DEFINE_INSN -opt_aset_with -(CALL_INFO ci, VALUE key) -(VALUE recv, VALUE val) -(VALUE val) -{ - if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) { - rb_hash_aset(recv, key, val); - } - else { - PUSH(recv); - PUSH(rb_str_resurrect(key)); - PUSH(val); - CALL_SIMPLE_METHOD(recv); - } -} - -/** - @c optimize - @e recv[str] - @j 最適化された recv[str]。 - */ -DEFINE_INSN -opt_aref_with -(CALL_INFO ci, VALUE key) -(VALUE recv) -(VALUE val) -{ - if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) { - val = rb_hash_aref(recv, key); - } - else { - PUSH(recv); - PUSH(rb_str_resurrect(key)); - CALL_SIMPLE_METHOD(recv); - } -} - -/** - @c optimize @e optimized length @j 最適化された recv.length()。 */ @@ -1957,15 +1944,15 @@ opt_length { if (!SPECIAL_CONST_P(recv)) { if (RBASIC_CLASS(recv) == rb_cString && - BASIC_OP_UNREDEFINED_P(BOP_LENGTH, STRING_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idLength, String)) { val = rb_str_length(recv); } else if (RBASIC_CLASS(recv) == rb_cArray && - BASIC_OP_UNREDEFINED_P(BOP_LENGTH, ARRAY_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idLength, Array)) { val = LONG2NUM(RARRAY_LEN(recv)); } else if (RBASIC_CLASS(recv) == rb_cHash && - BASIC_OP_UNREDEFINED_P(BOP_LENGTH, HASH_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idLength, Hash)) { val = INT2FIX(RHASH_SIZE(recv)); } else { @@ -1992,15 +1979,15 @@ opt_size { if (!SPECIAL_CONST_P(recv)) { if (RBASIC_CLASS(recv) == rb_cString && - BASIC_OP_UNREDEFINED_P(BOP_SIZE, STRING_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idSize, String)) { val = rb_str_length(recv); } else if (RBASIC_CLASS(recv) == rb_cArray && - BASIC_OP_UNREDEFINED_P(BOP_SIZE, ARRAY_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idSize, Array)) { val = LONG2NUM(RARRAY_LEN(recv)); } else if (RBASIC_CLASS(recv) == rb_cHash && - BASIC_OP_UNREDEFINED_P(BOP_SIZE, HASH_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idSize, Hash)) { val = INT2FIX(RHASH_SIZE(recv)); } else { @@ -2027,17 +2014,17 @@ opt_empty_p { if (!SPECIAL_CONST_P(recv)) { if (RBASIC_CLASS(recv) == rb_cString && - BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, STRING_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idEmptyP, String)) { if (RSTRING_LEN(recv) == 0) val = Qtrue; else val = Qfalse; } else if (RBASIC_CLASS(recv) == rb_cArray && - BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, ARRAY_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idEmptyP, Array)) { if (RARRAY_LEN(recv) == 0) val = Qtrue; else val = Qfalse; } else if (RBASIC_CLASS(recv) == rb_cHash && - BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, HASH_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idEmptyP, Hash)) { if (RHASH_EMPTY_P(recv)) val = Qtrue; else val = Qfalse; } @@ -2065,7 +2052,7 @@ opt_succ { if (SPECIAL_CONST_P(recv)) { if (FIXNUM_P(recv) && - BASIC_OP_UNREDEFINED_P(BOP_SUCC, FIXNUM_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idSucc, Fixnum)) { const VALUE obj = INT2FIX(1); /* fixnum + INT2FIX(1) */ val = (recv + (obj & (~1))); @@ -2080,11 +2067,11 @@ opt_succ } else { if (RBASIC_CLASS(recv) == rb_cString && - BASIC_OP_UNREDEFINED_P(BOP_SUCC, STRING_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idSucc, String)) { val = rb_str_succ(recv); } else if (RBASIC_CLASS(recv) == rb_cTime && - BASIC_OP_UNREDEFINED_P(BOP_SUCC, TIME_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idSucc, Time)) { val = rb_time_succ(recv); } else @@ -2134,7 +2121,7 @@ opt_regexpmatch1 (VALUE obj) (VALUE val) { - if (BASIC_OP_UNREDEFINED_P(BOP_MATCH, REGEXP_REDEFINED_OP_FLAG)) { + if (BASIC_OP_UNREDEFINED_P(idEqTilde, Regexp)) { val = rb_reg_match(r, obj); } else { @@ -2154,7 +2141,7 @@ opt_regexpmatch2 (VALUE val) { if (CLASS_OF(obj2) == rb_cString && - BASIC_OP_UNREDEFINED_P(BOP_MATCH, STRING_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idEqTilde, String)) { val = rb_reg_match(obj1, obj2); } else { diff --git a/template/opt_method.h.tmpl b/template/opt_method.h.tmpl new file mode 100644 index 0000000..39c4043 --- /dev/null +++ b/template/opt_method.h.tmpl @@ -0,0 +1,71 @@ +/* DO NOT EDIT THIS FILE DIRECTLY: edit template/opt_method.h.tmpl instead */ +#ifndef RUBY_OPT_METHOD_H +#define RUBY_OPT_METHOD_H +<% +defs = File.join(File.dirname(File.dirname(erb.filename)), "defs/opt_method.def") +eval(File.read(defs), binding, defs) +%> +typedef uint<%= OM_ALIGN %>_t rb_om_bitmap_t; + +enum ruby_optimized_method { +<% +opt_masks = {} +n = 0 +OPT_METHODS.each do |(mid, *classes)| + classes.each do |klass| + if Array === klass + opt_masks[mid] = klass.dup + # we will align these in the second loop, below + next + end %> + <%= om(mid, klass) %> = <%= n += 1 %>, +<% + end # classes.each +end # OPT_METHODS.each + +# align multi-class bits so a single AND operation may +# be byte-aligned and used to check an mid for up to 7 classes at once: +opt_masks.each do |mid, classes| + # round up n to the next aligned byte slot + n = (n + OM_ALIGN) & OM_ALIGN_MASK + + classes.each do |k| +%> + <%= om(mid, k) %> = <%= n += 1 %>, +<%= +# we need this macro to generate shifts for the masks enums below: +"#define #{om(mid, k)} (#{n})" +%> +<% + end # classes.each +end # opt_masks.each +if n >= ((1 << OM_ALIGN) - 1) + raise "OM_ALIGN needs to be raised to support more optimized methods" +end +%> + OM_LAST_ = <%= om_last = (n += 1) %>, /* for bitmap sizing */ + /* special mask values below */ +<% +# generate mask enums +opt_masks.each do |mid, c| + # n.b.: negate masks to simplify the rb_opt_method_is_mask check: +%> + <%= om(mid, c) %> = -(<%= + # pack into 16 bits so it may be a negative Fixnum + # 1) 8 byte offset + # 2) OM_ALIGN bytes mask (8 or 16) + sep = "|\n " + "/* offset: */ ((#{om(mid, c[0])} / #{OM_ALIGN}) << #{OM_ALIGN}) " \ + "#{sep} /* mask: */ (" + + c.map { |k| "(1U << (#{om(mid, k)} % #{OM_ALIGN}))" }.join(sep) + # mask + ')' + %>), +<% +end # opt_masks.each +%> + OM_ALIGN_ = <%= OM_ALIGN %>, + OM_SIZE_ = <%= ((om_last + OM_ALIGN) & OM_ALIGN_MASK) / OM_ALIGN %>, + OM_GETMASK_ = (1 << OM_ALIGN_) - 1 +}; + +#endif /* RUBY_OPT_METHOD_H */ diff --git a/template/opt_method.inc.tmpl b/template/opt_method.inc.tmpl new file mode 100644 index 0000000..0501121 --- /dev/null +++ b/template/opt_method.inc.tmpl @@ -0,0 +1,49 @@ +/* DO NOT EDIT THIS FILE DIRECTLY: edit template/opt_method.inc.tmpl instead */ +<% +defs = File.join(File.dirname(File.dirname(erb.filename)), "defs/opt_method.def") +eval(File.read(defs), binding, defs) +%> + +static void +add_opt_method(st_table *tbl, VALUE klass, ID mid, + enum ruby_optimized_method om) +{ + rb_method_entry_t *me = rb_method_entry_at(klass, mid); + + if (me && me->def && me->def->type == VM_METHOD_TYPE_CFUNC) { + st_insert(tbl, (st_data_t)me, (st_data_t)om); + } + else if (mid != idNeq) { + rb_bug("undefined optimized method: %s", rb_id2name(mid)); + } +} + +static void +vm_init_redefined_flags(void *tbl) +{ +<% +OPT_METHODS.each do |(mid, *classes)| + classes.each do |klass| + if Array === klass + klass.each do |k| +%> + add_opt_method(tbl, rb_c<%= k %>, <%= mid %>, <%= om(mid, k) %>); +<% + end # klass.each + else +%> + add_opt_method(tbl, rb_c<%= klass %>, <%= mid %>, <%= om(mid, klass) %>); +<% end # !(Array === klass) + end # classes.each +end # OPT_METHODS.each +%> +} + +static int +vm_redefinition_check_flag(VALUE klass) +{ +<% opt_classes.each_key do |klass| %> + if (klass == rb_c<%= klass %>) return 1; +<% end %> + return 0; +} diff --git a/test/-ext-/symbol/test_type.rb b/test/-ext-/symbol/test_type.rb index f1749f5..5bd79b8 100644 --- a/test/-ext-/symbol/test_type.rb +++ b/test/-ext-/symbol/test_type.rb @@ -4,6 +4,7 @@ require "-test-/symbol" module Test_Symbol class TestType < Test::Unit::TestCase def test_id2str_fstring_bug9171 + require_compile_option(:peephole_optimization) fstr = eval("# encoding: us-ascii 'foobar'.freeze") assert_same fstr, Bug::Symbol.id2str(:foobar) diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb index 8a5ed34..faacf48 100644 --- a/test/objspace/test_objspace.rb +++ b/test/objspace/test_objspace.rb @@ -195,6 +195,7 @@ class TestObjSpace < Test::Unit::TestCase end def test_dump_flags + require_compile_option(:peephole_optimization) info = ObjectSpace.dump("foo".freeze) assert_match /"wb_protected":true, "old":true, "long_lived":true, "marked":true/, info assert_match /"fstring":true/, info diff --git a/test/ruby/envutil.rb b/test/ruby/envutil.rb index 81b982c..e844822 100644 --- a/test/ruby/envutil.rb +++ b/test/ruby/envutil.rb @@ -477,6 +477,16 @@ eom AssertFile end + def require_compile_option(opt) + case RubyVM::InstructionSequence.compile_option[opt] + when true + when false + skip(":#{opt} disabled") + else + raise ArgumentError, "unrecognized compile option: #{opt.inspect}" + end + end + class << (AssertFile = Struct.new(:failure_message).new) include Assertions def assert_file_predicate(predicate, *args) diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index 4431552..bb7e8b5 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -216,6 +216,7 @@ class TestHash < Test::Unit::TestCase end def test_AREF_fstring_key + require_compile_option(:peephole_optimization) h = {"abc" => 1} before = GC.stat(:total_allocated_objects) 5.times{ h["abc"] } @@ -230,6 +231,7 @@ class TestHash < Test::Unit::TestCase end def test_NEWHASH_fstring_key + require_compile_option(:peephole_optimization) a = {"ABC" => :t} b = {"ABC" => :t} assert_same a.keys[0], b.keys[0] diff --git a/test/ruby/test_iseq.rb b/test/ruby/test_iseq.rb index 94a814c..ac1c417 100644 --- a/test/ruby/test_iseq.rb +++ b/test/ruby/test_iseq.rb @@ -118,6 +118,7 @@ class TestISeq < Test::Unit::TestCase end def test_label_fstring + require_compile_option(:peephole_optimization) c = Class.new{ def foobar() end } a, b = eval("# encoding: us-ascii\n'foobar'.freeze"), diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index d82d2bc..8d46764 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -1908,6 +1908,13 @@ class TestString < Test::Unit::TestCase } end + def test_literal_freeze + require_compile_option(:peephole_optimization) + before = GC.stat(:total_allocated_objects) + 5.times { "".freeze } + assert_equal before, GC.stat(:total_allocated_objects) + end + class S2 < String end def test_str_new4 @@ -2272,6 +2279,192 @@ class TestString < Test::Unit::TestCase end; end if [0].pack("l!").bytesize < [nil].pack("p").bytesize # enable only when string size range is smaller than memory space + + def test_opt_strcat_with + assert_separately([], <<-RUBY) + class String + undef << + def <<(str) + "overridden" + end + end + assert_equal("overridden", "" << "foo") + foo = "foo" + assert_equal("overridden", foo << "bar") + RUBY + + if @cls == String + nr = 10 + recv = "" + before = GC.stat(:total_allocated_objects) + nr.times { recv << "constant" } + assert_equal before, GC.stat(:total_allocated_objects) + assert_equal "constant" * nr, recv + + before = GC.stat(:total_allocated_objects) + nr.times { "recv" << "constant" } + assert_equal before + nr, GC.stat(:total_allocated_objects) + end + end + + def test_opt_str_lit + assert_separately([], <<-RUBY) + class String + undef == + def ==(str) + :TROO + end + end + foo = "foo" + assert_equal(:TROO, (foo == "foo"), 'string == "peephole 2nd pass"') + assert_equal(:TROO, ("foo" == foo), '"yoda 1st pass" == string') + RUBY + + assert_separately([], <<-RUBY) + class String + undef != + def !=(str) + :NOT + end + end + foo = "" + assert_equal(:NOT, ("foo" != foo), '"yoda 1st pass" != string') + assert_equal(:NOT, (foo != "foo"), 'string != "peephole 2nd pass"') + RUBY + + assert_separately([], <<-RUBY) + class String + undef size + undef length + def size + 42 + end + def length + 42 + end + end + assert_equal(42, "".size, 'lit string size') + assert_equal(42, "".length, 'lit string size') + RUBY + + assert_separately([], <<-RUBY) + class String + undef + + def +(other) + :plus + end + end + foo = "a" + assert_equal(:plus, "" + foo, 'lit plus') + assert_equal(:plus, foo + "", 'plus lit') + RUBY + + assert_separately([], <<-RUBY) + class String + undef * + def *(other) + :mult + end + end + assert_equal(:mult, "x" * 3, 'lit mult') + RUBY + + assert_separately([], <<-RUBY) + class String + undef === + def ===(other) + other + end + end + str = "y" + assert_equal(false, "x" === false, 'lit threequal') + assert_equal("x", str === "x", 'threequal lit') + RUBY + + if @cls == String + nr = 10 + + recv = "something" + res = [] + before = GC.stat(:total_allocated_objects) + nr.times { res << (recv == "constant") } # opt_streq1 + nr.times { res << ("constant" == recv) } # opt_streq2 + nr.times { res << ("something" != recv) } # 1st pass peephole + nr.times { res << ("constant" == recv) } # opt_streq2 + nr.times { res << ("constant" === recv) } # opt_streqq2 + nr.times { res << (recv != "something") } # 2nd pass peephole + assert_equal before, GC.stat(:total_allocated_objects) + assert_equal [ false ], res.uniq! + + res.clear + before = GC.stat(:total_allocated_objects) + nr.times { res << (recv == "something") } # opt_streq1 + nr.times { res << ("something" == recv) } # opt_streq2 + nr.times { res << ("something" === recv) } # opt_streqq2 + nr.times { res << (recv === "something") } # opt_streqq2 + nr.times { res << ("constant" != recv) } # 1st pass peephole + nr.times { res << (recv != "constant") } # 2nd pass peephole + nr.times { res << ("a" != "b") } # 1st pass peephole + nr.times { res << ("a" == "a") } # 1st pass peephole + nr.times { res << ("".size == 0) } # 2nd pass peephole + nr.times { res << ("".length == 0) } # 2nd pass peephole + assert_equal before, GC.stat(:total_allocated_objects) + assert_equal [ true ], res.uniq! + + # :+ optimizations + res.clear + before = GC.stat(:total_allocated_objects) + nr.times { res << ("foo" + recv) } + assert_equal before + nr, GC.stat(:total_allocated_objects) + assert_equal [ "foosomething" ], res.uniq! + + res.clear + before = GC.stat(:total_allocated_objects) + nr.times { res << (recv + "foo") } + assert_equal before + nr, GC.stat(:total_allocated_objects) + assert_equal [ "somethingfoo" ], res.uniq! + + res.clear + before = GC.stat(:total_allocated_objects) + nr.times { res << ('a' * 3) } + assert_equal before + nr, GC.stat(:total_allocated_objects) + assert_equal [ "aaa" ], res.uniq! + end + end + + def assert_no_new_allocations(mesg = "", adjust = 0) + before = GC.stat(:total_allocated_objects) + yield + after = GC.stat(:total_allocated_objects) + assert_equal before, after - adjust, mesg + end + + def test_opt_str_lit_gsub + return if @cls != String + require_compile_option(:peephole_optimization) + foo = "foo" + re = /nomatch/ + foo.gsub!(re, "00") # compile regexp + n = 3 + + assert_no_new_allocations("gsub var regexp") do + n.times { foo.gsub!(re, "00") } + end + + # compiles re once: + assert_no_new_allocations("gsub lit regexp", 1) do + n.times { foo.gsub!(/nomatch/, "00") } + end + + assert_no_new_allocations("gsub literal string") do + n.times { foo.gsub!("nomatch", "00") } + end + + ary = [ [ re ] ] + assert_no_new_allocations("bigger stack") do + n.times { foo.gsub!(ary[0][0], "00") } + end + end end class TestString2 < TestString diff --git a/vm.c b/vm.c index 73adea4..4de83ae 100644 --- a/vm.c +++ b/vm.c @@ -20,6 +20,7 @@ #include "eval_intern.h" #include "probes.h" #include "probes_helper.h" +#include "opt_method.inc" static inline VALUE * VM_EP_LEP(VALUE *ep) @@ -1134,30 +1135,16 @@ rb_iter_break_value(VALUE val) static st_table *vm_opt_method_table = 0; -static int -vm_redefinition_check_flag(VALUE klass) -{ - if (klass == rb_cFixnum) return FIXNUM_REDEFINED_OP_FLAG; - if (klass == rb_cFloat) return FLOAT_REDEFINED_OP_FLAG; - if (klass == rb_cString) return STRING_REDEFINED_OP_FLAG; - if (klass == rb_cArray) return ARRAY_REDEFINED_OP_FLAG; - if (klass == rb_cHash) return HASH_REDEFINED_OP_FLAG; - if (klass == rb_cBignum) return BIGNUM_REDEFINED_OP_FLAG; - if (klass == rb_cSymbol) return SYMBOL_REDEFINED_OP_FLAG; - if (klass == rb_cTime) return TIME_REDEFINED_OP_FLAG; - if (klass == rb_cRegexp) return REGEXP_REDEFINED_OP_FLAG; - return 0; -} - static void rb_vm_check_redefinition_opt_method(const rb_method_entry_t *me, VALUE klass) { - st_data_t bop; + st_data_t om; if (!me->def || me->def->type == VM_METHOD_TYPE_CFUNC) { - if (st_lookup(vm_opt_method_table, (st_data_t)me, &bop)) { - int flag = vm_redefinition_check_flag(klass); + if (st_lookup(vm_opt_method_table, (st_data_t)me, &om)) { + unsigned int i = om / OM_ALIGN_; + rb_om_bitmap_t mask = (rb_om_bitmap_t)(1U << (om % OM_ALIGN_)); - ruby_vm_redefined_flag[bop] |= flag; + ruby_vm_redefined_flag[i] |= mask; } } } @@ -1184,51 +1171,11 @@ rb_vm_check_redefinition_by_prepend(VALUE klass) } static void -add_opt_method(VALUE klass, ID mid, VALUE bop) -{ - rb_method_entry_t *me = rb_method_entry_at(klass, mid); - - if (me && me->def && - me->def->type == VM_METHOD_TYPE_CFUNC) { - st_insert(vm_opt_method_table, (st_data_t)me, (st_data_t)bop); - } - else { - rb_bug("undefined optimized method: %s", rb_id2name(mid)); - } -} - -static void vm_init_redefined_flag(void) { - ID mid; - VALUE bop; - vm_opt_method_table = st_init_numtable(); -#define OP(mid_, bop_) (mid = id##mid_, bop = BOP_##bop_, ruby_vm_redefined_flag[bop] = 0) -#define C(k) add_opt_method(rb_c##k, mid, bop) - OP(PLUS, PLUS), (C(Fixnum), C(Float), C(String), C(Array)); - OP(MINUS, MINUS), (C(Fixnum), C(Float)); - OP(MULT, MULT), (C(Fixnum), C(Float)); - OP(DIV, DIV), (C(Fixnum), C(Float)); - OP(MOD, MOD), (C(Fixnum), C(Float)); - OP(Eq, EQ), (C(Fixnum), C(Float), C(String)); - OP(Eqq, EQQ), (C(Fixnum), C(Bignum), C(Float), C(Symbol), C(String)); - OP(LT, LT), (C(Fixnum), C(Float)); - OP(LE, LE), (C(Fixnum), C(Float)); - OP(GT, GT), (C(Fixnum), C(Float)); - OP(GE, GE), (C(Fixnum), C(Float)); - OP(LTLT, LTLT), (C(String), C(Array)); - OP(AREF, AREF), (C(Array), C(Hash)); - OP(ASET, ASET), (C(Array), C(Hash)); - OP(Length, LENGTH), (C(Array), C(String), C(Hash)); - OP(Size, SIZE), (C(Array), C(String), C(Hash)); - OP(EmptyP, EMPTY_P), (C(Array), C(String), C(Hash)); - OP(Succ, SUCC), (C(Fixnum), C(String), C(Time)); - OP(EqTilde, MATCH), (C(Regexp), C(String)); - OP(Freeze, FREEZE), (C(String)); -#undef C -#undef OP + vm_init_redefined_flags(vm_opt_method_table); /* opt_method.h.tmpl */ } /* for vm development */ diff --git a/vm_core.h b/vm_core.h index 3f1ddc8..10281ef 100644 --- a/vm_core.h +++ b/vm_core.h @@ -24,6 +24,7 @@ #include "method.h" #include "ruby_atomic.h" #include "ccan/list/list.h" +#include "opt_method.h" #include "ruby/thread_native.h" #if defined(_WIN32) @@ -320,33 +321,6 @@ enum ruby_special_exceptions { ruby_special_error_count }; -enum ruby_basic_operators { - BOP_PLUS, - BOP_MINUS, - BOP_MULT, - BOP_DIV, - BOP_MOD, - BOP_EQ, - BOP_EQQ, - BOP_LT, - BOP_LE, - BOP_LTLT, - BOP_AREF, - BOP_ASET, - BOP_LENGTH, - BOP_SIZE, - BOP_EMPTY_P, - BOP_SUCC, - BOP_GT, - BOP_GE, - BOP_NOT, - BOP_NEQ, - BOP_MATCH, - BOP_FREEZE, - - BOP_LAST_ -}; - #define GetVMPtr(obj, ptr) \ GetCoreDataFromValue((obj), rb_vm_t, (ptr)) @@ -441,7 +415,7 @@ typedef struct rb_vm_struct { size_t fiber_machine_stack_size; } default_params; - short redefined_flag[BOP_LAST_]; + rb_om_bitmap_t redefined_flag[OM_SIZE_]; } rb_vm_t; /* default values */ @@ -458,18 +432,8 @@ typedef struct rb_vm_struct { #define RUBY_VM_FIBER_MACHINE_STACK_SIZE ( 64 * 1024 * sizeof(VALUE)) /* 256 KB or 512 KB */ #define RUBY_VM_FIBER_MACHINE_STACK_SIZE_MIN ( 16 * 1024 * sizeof(VALUE)) /* 64 KB or 128 KB */ -/* optimize insn */ -#define FIXNUM_REDEFINED_OP_FLAG (1 << 0) -#define FLOAT_REDEFINED_OP_FLAG (1 << 1) -#define STRING_REDEFINED_OP_FLAG (1 << 2) -#define ARRAY_REDEFINED_OP_FLAG (1 << 3) -#define HASH_REDEFINED_OP_FLAG (1 << 4) -#define BIGNUM_REDEFINED_OP_FLAG (1 << 5) -#define SYMBOL_REDEFINED_OP_FLAG (1 << 6) -#define TIME_REDEFINED_OP_FLAG (1 << 7) -#define REGEXP_REDEFINED_OP_FLAG (1 << 8) - -#define BASIC_OP_UNREDEFINED_P(op, klass) (LIKELY((GET_VM()->redefined_flag[(op)]&(klass)) == 0)) +#define BASIC_OP_UNREDEFINED_P(mid, klass) \ + rb_basic_op_unredefined_p(OM_##mid##__##klass) #ifndef VM_DEBUG_BP_CHECK #define VM_DEBUG_BP_CHECK 0 diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 05ed3c6..2aedb46 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -872,17 +872,17 @@ VALUE opt_eq_func(VALUE recv, VALUE obj, CALL_INFO ci) { if (FIXNUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_EQ, FIXNUM_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idEq, Fixnum)) { return (recv == obj) ? Qtrue : Qfalse; } else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_EQ, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idEq, Float)) { return (recv == obj) ? Qtrue : Qfalse; } else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && - BASIC_OP_UNREDEFINED_P(BOP_EQ, FLOAT_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idEq, Float)) { double a = RFLOAT_VALUE(recv); double b = RFLOAT_VALUE(obj); @@ -893,7 +893,7 @@ opt_eq_func(VALUE recv, VALUE obj, CALL_INFO ci) } else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString && - BASIC_OP_UNREDEFINED_P(BOP_EQ, STRING_REDEFINED_OP_FLAG)) { + BASIC_OP_UNREDEFINED_P(idEq, String)) { return rb_str_equal(recv, obj); } } diff --git a/vm_insnhelper.h b/vm_insnhelper.h index 31f8ffc..a4290ee 100644 --- a/vm_insnhelper.h +++ b/vm_insnhelper.h @@ -229,5 +229,28 @@ enum vm_regan_acttype { static VALUE make_no_method_exception(VALUE exc, const char *format, VALUE obj, int argc, const VALUE *argv); - +static inline int +rb_basic_op_unredefined_p(enum ruby_optimized_method om) +{ + unsigned int i = om / OM_ALIGN_; + rb_om_bitmap_t mask = (rb_om_bitmap_t)(1U << (om % OM_ALIGN_)); + + return LIKELY((GET_VM()->redefined_flag[i] & mask) == 0); +} + +static inline int +rb_basic_mask_unredefined_p(enum ruby_optimized_method om) +{ + unsigned int uom = (unsigned int)-om; + unsigned int offset = 0xffU & (uom >> OM_ALIGN_); + rb_om_bitmap_t mask = (rb_om_bitmap_t)(OM_GETMASK_ & uom); + + return LIKELY((GET_VM()->redefined_flag[offset] & mask) == 0); +} + +static inline int +rb_opt_method_is_mask(enum ruby_optimized_method om) +{ + return !!((int)om < 0); +} #endif /* RUBY_INSNHELPER_H */