dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize
@ 2014-10-17  6:19 Eric Wong
  2014-10-17  6:19 ` [PATCH 02/13] add generic and flexible opt_str_lit insn Eric Wong
                   ` (11 more replies)
  0 siblings, 12 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

`"literal".freeze', `obj["literal"]', and `obj["literal"] = val'
are all peephole optimizations and not appropriate for
iseq_compile_each.
---
 compile.c                      | 66 ++++++++++++++++++++++--------------------
 test/-ext-/symbol/test_type.rb |  1 +
 test/objspace/test_objspace.rb |  1 +
 test/ruby/envutil.rb           | 10 +++++++
 test/ruby/test_hash.rb         |  2 ++
 test/ruby/test_iseq.rb         |  1 +
 test/ruby/test_string.rb       |  7 +++++
 7 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/compile.c b/compile.c
index 8df7acf..bd8f75f 100644
--- a/compile.c
+++ b/compile.c
@@ -1819,6 +1819,41 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 	    }
 	}
     }
+
+    /* string literal optimizations */
+    if (iobj->insn_id == BIN(putstring)) {
+	INSN *niobj = (INSN *)get_next_insn((INSN *)list);
+
+	if (niobj && niobj->insn_id == BIN(send)) {
+	    rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0];
+
+	    if (ci->blockiseq == 0 &&
+		(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP) == 0) {
+
+		/* "literal".freeze -> opt_str_freeze("literal") */
+		if (ci->mid == idFreeze && ci->orig_argc == 0) {
+		    iobj->insn_id = BIN(opt_str_freeze);
+		    REMOVE_ELEM((LINK_ELEMENT *)niobj);
+		}
+
+		/* obj["literal"] -> opt_aref_with(obj, "literal") */
+		else if (ci->mid == idAREF && ci->orig_argc == 1) {
+		    VALUE *old_operands = iobj->operands;
+
+		    iobj->insn_id = BIN(opt_aref_with);
+		    iobj->operand_size = insn_len(iobj->insn_id) - 1;
+
+		    iobj->operands = (VALUE *)compile_data_alloc(iseq,
+					iobj->operand_size * sizeof(VALUE));
+		    iobj->operands[0] = (VALUE)ci;
+		    iobj->operands[1] = old_operands[0];
+
+		    REMOVE_ELEM((LINK_ELEMENT *)niobj);
+		}
+	    }
+	}
+    }
+
     return COMPILE_OK;
 }
 
@@ -4238,37 +4273,6 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	break;
       }
       case NODE_CALL:
-	/* optimization shortcut
-	 *   "literal".freeze -> opt_str_freeze("literal")
-	 */
-	if (node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
-	    node->nd_mid == idFreeze && node->nd_args == NULL)
-	{
-	    VALUE str = rb_fstring(node->nd_recv->nd_lit);
-	    iseq_add_mark_object(iseq, str);
-	    ADD_INSN1(ret, line, opt_str_freeze, str);
-	    if (poped) {
-		ADD_INSN(ret, line, pop);
-	    }
-	    break;
-	}
-	/* optimization shortcut
-	 *   obj["literal"] -> opt_aref_with(obj, "literal")
-	 */
-	if (node->nd_mid == idAREF && !private_recv_p(node) && node->nd_args &&
-	    nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 1 &&
-	    nd_type(node->nd_args->nd_head) == NODE_STR)
-	{
-	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
-	    node->nd_args->nd_head->nd_lit = str;
-	    COMPILE(ret, "recv", node->nd_recv);
-	    ADD_INSN2(ret, line, opt_aref_with,
-		      new_callinfo(iseq, idAREF, 1, 0, 0), str);
-	    if (poped) {
-		ADD_INSN(ret, line, pop);
-	    }
-	    break;
-	}
       case NODE_FCALL:
       case NODE_VCALL:{		/* VCALL: variable or call */
 	/*
diff --git a/test/-ext-/symbol/test_type.rb b/test/-ext-/symbol/test_type.rb
index f1749f5..5bd79b8 100644
--- a/test/-ext-/symbol/test_type.rb
+++ b/test/-ext-/symbol/test_type.rb
@@ -4,6 +4,7 @@ require "-test-/symbol"
 module Test_Symbol
   class TestType < Test::Unit::TestCase
     def test_id2str_fstring_bug9171
+      require_compile_option(:peephole_optimization)
       fstr = eval("# encoding: us-ascii
         'foobar'.freeze")
       assert_same fstr, Bug::Symbol.id2str(:foobar)
diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb
index 8a5ed34..faacf48 100644
--- a/test/objspace/test_objspace.rb
+++ b/test/objspace/test_objspace.rb
@@ -195,6 +195,7 @@ class TestObjSpace < Test::Unit::TestCase
   end
 
   def test_dump_flags
+    require_compile_option(:peephole_optimization)
     info = ObjectSpace.dump("foo".freeze)
     assert_match /"wb_protected":true, "old":true, "long_lived":true, "marked":true/, info
     assert_match /"fstring":true/, info
diff --git a/test/ruby/envutil.rb b/test/ruby/envutil.rb
index f5fbb7c..bddaf82 100644
--- a/test/ruby/envutil.rb
+++ b/test/ruby/envutil.rb
@@ -520,6 +520,16 @@ eom
         end
       end
 
+      def require_compile_option(opt)
+        case RubyVM::InstructionSequence.compile_option[opt]
+        when true
+        when false
+          skip(":#{opt} disabled")
+        else
+          raise ArgumentError, "unrecognized compile option: #{opt.inspect}"
+        end
+      end
+
       class << (AssertFile = Struct.new(:failure_message).new)
         include Assertions
         def assert_file_predicate(predicate, *args)
diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb
index 4431552..bb7e8b5 100644
--- a/test/ruby/test_hash.rb
+++ b/test/ruby/test_hash.rb
@@ -216,6 +216,7 @@ class TestHash < Test::Unit::TestCase
   end
 
   def test_AREF_fstring_key
+    require_compile_option(:peephole_optimization)
     h = {"abc" => 1}
     before = GC.stat(:total_allocated_objects)
     5.times{ h["abc"] }
@@ -230,6 +231,7 @@ class TestHash < Test::Unit::TestCase
   end
 
   def test_NEWHASH_fstring_key
+    require_compile_option(:peephole_optimization)
     a = {"ABC" => :t}
     b = {"ABC" => :t}
     assert_same a.keys[0], b.keys[0]
diff --git a/test/ruby/test_iseq.rb b/test/ruby/test_iseq.rb
index 94a814c..ac1c417 100644
--- a/test/ruby/test_iseq.rb
+++ b/test/ruby/test_iseq.rb
@@ -118,6 +118,7 @@ class TestISeq < Test::Unit::TestCase
   end
 
   def test_label_fstring
+    require_compile_option(:peephole_optimization)
     c = Class.new{ def foobar() end }
 
     a, b = eval("# encoding: us-ascii\n'foobar'.freeze"),
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index d82d2bc..4dc790f 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1908,6 +1908,13 @@ class TestString < Test::Unit::TestCase
     }
   end
 
+  def test_literal_freeze
+    require_compile_option(:peephole_optimization)
+    before = GC.stat(:total_allocated_objects)
+    5.times { "".freeze }
+    assert_equal before, GC.stat(:total_allocated_objects)
+  end
+
   class S2 < String
   end
   def test_str_new4
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 02/13] add generic and flexible opt_str_lit insn
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  2014-10-17  6:19 ` [PATCH 03/13] compile.c: optimize << and == using putstring_for Eric Wong
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

A new opt_str_lit instruction may replace all current uses of:

* opt_str_freeze
* opt_aref_with
* opt_aset_with

This new instruction should also be usable to implement new
optimizations to avoid rb_str_resurrect.

Optimizations for literal hash["literal"] (aref/lookup) and
"literal".freeze are easily moved to the peephole optimizer.

However, it seems easier to optimize `hash["literal"] = val'
in iseq_compile_each right now.

This reduces performance compared to the old opt_aref_with and
opt_aset_with instructions slightly, but is more elegant for in
avoiding special cases.  We may decide to resurrect opt_aref_with
and opt_aset_with if we want to recover the small performance loss
and can accept a bigger VM loop.

"".freeze performance is probably not interesting to anyone :)

benchmark results:
minimum results in each 5 measurements.
Execution time (sec)
name                    2.1.3   trunk   built
loop_whileloop2         0.106   0.106   0.106
vm2_hash_aref_lit*      0.503   0.162   0.192
vm2_hash_aset_lit*      0.587   0.214   0.241

Speedup ratio: compare with the result of `2.1.3' (greater is better)
name                    trunk   built
loop_whileloop2         1.000   0.998
vm2_hash_aref_lit*      3.099   2.621
vm2_hash_aset_lit*      2.741   2.435

raw data:

[["loop_whileloop2",
  [[0.10656525194644928,
    0.10594194941222668,
    0.10586611740291119,
    0.1066869841888547,
    0.10577277280390263],
   [0.1066260114312172,
    0.11153125017881393,
    0.1057466259226203,
    0.10648809000849724,
    0.10654668044298887],
   [0.10622590780258179,
    0.10608386714011431,
    0.105999612249434,
    0.10603114310652018,
    0.10603212099522352]]],
 ["vm2_hash_aref_lit",
  [[0.6088160118088126,
    0.6084766369313002,
    0.6114963851869106,
    0.6098374016582966,
    0.6085139447823167],
   [0.27083833049982786,
    0.2680424079298973,
    0.27930730395019054,
    0.26884936541318893,
    0.26798537466675043],
   [0.303865535184741,
    0.31264861673116684,
    0.2977857915684581,
    0.29944207333028316,
    0.30319013725966215]]],
 ["vm2_hash_aset_lit",
  [[0.6943842126056552,
    0.6933871945366263,
    0.694433419033885,
    0.6946460604667664,
    0.6931405374780297],
   [0.32219766546040773,
    0.3211979949846864,
    0.32000005338341,
    0.3278619237244129,
    0.3314230963587761],
   [0.3476126240566373,
    0.3471973007544875,
    0.3666635127738118,
    0.3474232777953148,
    0.3474671710282564]]]]
---
 benchmark/bm_vm2_hash_aref_lit.rb |   6 +++
 benchmark/bm_vm2_hash_aset_lit.rb |   6 +++
 compile.c                         |  74 ++++++++++++++++++---------
 insns.def                         | 102 +++++++++++++++++---------------------
 4 files changed, 108 insertions(+), 80 deletions(-)
 create mode 100644 benchmark/bm_vm2_hash_aref_lit.rb
 create mode 100644 benchmark/bm_vm2_hash_aset_lit.rb

diff --git a/benchmark/bm_vm2_hash_aref_lit.rb b/benchmark/bm_vm2_hash_aref_lit.rb
new file mode 100644
index 0000000..a6d4d12
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aref_lit.rb
@@ -0,0 +1,6 @@
+h = { "foo" => nil }
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  h["foo"]
+end
diff --git a/benchmark/bm_vm2_hash_aset_lit.rb b/benchmark/bm_vm2_hash_aset_lit.rb
new file mode 100644
index 0000000..58339ec
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aset_lit.rb
@@ -0,0 +1,6 @@
+h = {}
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  h["foo"] = nil
+end
diff --git a/compile.c b/compile.c
index bd8f75f..b5d3152 100644
--- a/compile.c
+++ b/compile.c
@@ -1703,6 +1703,32 @@ get_prev_insn(INSN *iobj)
     return 0;
 }
 
+static void
+opt_str_lit_recv(rb_iseq_t *iseq, INSN *iobj,
+		enum ruby_basic_operators bop, int redef_flag, ID mid)
+{
+    VALUE recv_info = rb_ary_new_from_args(4,
+	    iobj->operands[0], INT2FIX(bop), INT2FIX(redef_flag), ID2SYM(mid));
+    OBJ_FREEZE(recv_info);
+    iobj->insn_id = BIN(opt_str_lit);
+    iobj->operands[0] = recv_info;
+    iseq_add_mark_object(iseq, recv_info); /* XXX check if needed */
+}
+
+static void
+opt_str_lit_arg(rb_iseq_t *iseq, INSN *iobj,
+		enum ruby_basic_operators bop, int redef_flag,
+		VALUE klass, int recv_off)
+{
+    VALUE recv_info = rb_ary_new_from_args(5,
+	    iobj->operands[0], INT2FIX(bop), INT2FIX(redef_flag),
+	    klass, INT2FIX(recv_off));
+    OBJ_FREEZE(recv_info);
+    iobj->insn_id = BIN(opt_str_lit);
+    iobj->operands[0] = recv_info;
+    iseq_add_mark_object(iseq, recv_info); /* XXX check if needed */
+}
+
 static int
 iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcallopt)
 {
@@ -1827,28 +1853,19 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 	if (niobj && niobj->insn_id == BIN(send)) {
 	    rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0];
 
-	    if (ci->blockiseq == 0 &&
-		(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP) == 0) {
+	    if (ci->blockiseq == 0 && !(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP)) {
 
-		/* "literal".freeze -> opt_str_freeze("literal") */
+		/* "literal".freeze -> opt_str_lit("literal", :freeze, ...) */
 		if (ci->mid == idFreeze && ci->orig_argc == 0) {
-		    iobj->insn_id = BIN(opt_str_freeze);
+		    opt_str_lit_recv(iseq, iobj, BOP_FREEZE,
+				    STRING_REDEFINED_OP_FLAG, ci->mid);
 		    REMOVE_ELEM((LINK_ELEMENT *)niobj);
 		}
 
-		/* obj["literal"] -> opt_aref_with(obj, "literal") */
+		/* obj["literal"] -> opt_str_lit("literal", Hash, 0) */
 		else if (ci->mid == idAREF && ci->orig_argc == 1) {
-		    VALUE *old_operands = iobj->operands;
-
-		    iobj->insn_id = BIN(opt_aref_with);
-		    iobj->operand_size = insn_len(iobj->insn_id) - 1;
-
-		    iobj->operands = (VALUE *)compile_data_alloc(iseq,
-					iobj->operand_size * sizeof(VALUE));
-		    iobj->operands[0] = (VALUE)ci;
-		    iobj->operands[1] = old_operands[0];
-
-		    REMOVE_ELEM((LINK_ELEMENT *)niobj);
+		    opt_str_lit_arg(iseq, iobj, BOP_AREF,
+				    HASH_REDEFINED_OP_FLAG, rb_cHash, 0);
 		}
 	    }
 	}
@@ -5245,23 +5262,34 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	int asgnflag;
 
 	/* optimization shortcut
-	 *   obj["literal"] = value -> opt_aset_with(obj, "literal", value)
+	 *   obj["literal"] = val -> send(obj, :[]=, opt_str_lit("lit"), val)
+	 * TODO: ideally this should be done inside iseq_peephole_optimize,
+	 * but that would require a lot of scanning as the `val' (2nd arg)
+	 * is of variable distance between the :putstring and :send insns
 	 */
-	if (node->nd_mid == idASET && !private_recv_p(node) && node->nd_args &&
+	if (iseq->compile_data->option->peephole_optimization &&
+	    node->nd_mid == idASET && !private_recv_p(node) && node->nd_args &&
 	    nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 2 &&
 	    nd_type(node->nd_args->nd_head) == NODE_STR)
 	{
 	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
+	    VALUE recv_info = rb_ary_new_from_args(5, str,
+		    INT2FIX(BOP_ASET), INT2FIX(HASH_REDEFINED_OP_FLAG),
+		    rb_cHash, INT2FIX(0));
+
 	    node->nd_args->nd_head->nd_lit = str;
-	    iseq_add_mark_object(iseq, str);
+	    iseq_add_mark_object(iseq, recv_info);
+	    if (!poped) {
+		ADD_INSN(ret, line, putnil);
+	    }
 	    COMPILE(ret, "recv", node->nd_recv);
+	    ADD_INSN1(ret, line, opt_str_lit, recv_info);
 	    COMPILE(ret, "value", node->nd_args->nd_next->nd_head);
 	    if (!poped) {
-		ADD_INSN(ret, line, swap);
-		ADD_INSN1(ret, line, topn, INT2FIX(1));
+		ADD_INSN1(ret, line, setn, INT2FIX(3));
 	    }
-	    ADD_INSN2(ret, line, opt_aset_with,
-		      new_callinfo(iseq, idASET, 2, 0, 0), str);
+	    flag = VM_CALL_ARGS_SKIP_SETUP;
+	    ADD_SEND_R(ret, line, node->nd_mid, 2, 0, INT2FIX(flag));
 	    ADD_INSN(ret, line, pop);
 	    break;
 	}
diff --git a/insns.def b/insns.def
index bfa11a9..9a98bf8 100644
--- a/insns.def
+++ b/insns.def
@@ -356,6 +356,51 @@ putstring
 
 /**
   @c put
+  @e put string val. string may be created depending on recv_info conditions
+ */
+DEFINE_INSN
+opt_str_lit
+(VALUE recv_info)
+()
+(VALUE val)
+{
+    /*
+     * recv_info:
+     * 0 - str
+     * 1 - basic operator flag (BOP_*)
+     * 2 - redefined flag (*_REDEFINED_OP_FLAG)
+     * 3 - Class (optimized receiver class) or Symbol (method name)
+     * 4 - stack offset (Fixint), only present if [3] is a Class
+     */
+    const VALUE *ri = RARRAY_CONST_PTR(recv_info);
+    enum ruby_basic_operators bop = FIX2INT(ri[1]);
+    int redef_flag = FIX2INT(ri[2]);
+    VALUE msym_or_class = ri[3];
+
+    val = ri[0]; /* hopefully, this is the only val assignment we need */
+
+    /* check if the receiver is an on-stack object: */
+    if (!SYMBOL_P(msym_or_class)) {
+	VALUE recv = TOPN(FIX2INT(ri[4]));
+
+	if (SPECIAL_CONST_P(recv) ||
+		RBASIC_CLASS(recv) != msym_or_class ||
+		!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+	    /* bad, somebody redefined an optimized method, slow path: */
+	    val = rb_str_resurrect(val);
+	}
+    }
+    else { /* receiver is the string literal itself (e.g. "str".freeze) */
+	if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+	    /* bad, somebody redefined an optimized method, slow path: */
+	    val = rb_str_resurrect(val);
+	    val = rb_funcall(val, SYM2ID(msym_or_class), 0);
+	}
+    }
+}
+
+/**
+  @c put
   @e put concatenate strings
   @j スタックトップの文字列を n 個連結し,結果をスタックにプッシュする。
  */
@@ -999,20 +1044,6 @@ send
     CALL_METHOD(ci);
 }
 
-DEFINE_INSN
-opt_str_freeze
-(VALUE str)
-()
-(VALUE val)
-{
-    if (BASIC_OP_UNREDEFINED_P(BOP_FREEZE, STRING_REDEFINED_OP_FLAG)) {
-	val = str;
-    }
-    else {
-	val = rb_funcall(rb_str_resurrect(str), idFreeze, 0);
-    }
-}
-
 /**
   @c optimize
   @e Invoke method without block, splat
@@ -1903,49 +1934,6 @@ opt_aset
 
 /**
   @c optimize
-  @e recv[str] = set
-  @j 最適化された recv[str] = set。
- */
-DEFINE_INSN
-opt_aset_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv, VALUE val)
-(VALUE val)
-{
-    if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) {
-	rb_hash_aset(recv, key, val);
-    }
-    else {
-	PUSH(recv);
-	PUSH(rb_str_resurrect(key));
-	PUSH(val);
-	CALL_SIMPLE_METHOD(recv);
-    }
-}
-
-/**
-  @c optimize
-  @e recv[str]
-  @j 最適化された recv[str]。
- */
-DEFINE_INSN
-opt_aref_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv)
-(VALUE val)
-{
-    if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) {
-	val = rb_hash_aref(recv, key);
-    }
-    else {
-	PUSH(recv);
-	PUSH(rb_str_resurrect(key));
-	CALL_SIMPLE_METHOD(recv);
-    }
-}
-
-/**
-  @c optimize
   @e optimized length
   @j 最適化された recv.length()。
  */
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 03/13] compile.c: optimize << and == using putstring_for
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
  2014-10-17  6:19 ` [PATCH 02/13] add generic and flexible opt_str_lit insn Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  2014-10-17  6:19 ` [PATCH 04/13] opt_str_lit: further optimizations and cleanups Eric Wong
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

This optimizes `obj << "literal"' and `obj == "literal"' calls.

Note: `"literal" == obj' is not optimized, yet.  We may have the
same problem as opt_aset_with in the peephole optimizer.
---
 benchmark/bm_vm2_strcat.rb |  7 ++++++
 benchmark/bm_vm2_streq1.rb |  6 +++++
 compile.c                  | 12 ++++++++++
 test/ruby/test_string.rb   | 56 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 81 insertions(+)
 create mode 100644 benchmark/bm_vm2_strcat.rb
 create mode 100644 benchmark/bm_vm2_streq1.rb

diff --git a/benchmark/bm_vm2_strcat.rb b/benchmark/bm_vm2_strcat.rb
new file mode 100644
index 0000000..b25ac6e
--- /dev/null
+++ b/benchmark/bm_vm2_strcat.rb
@@ -0,0 +1,7 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str << "const"
+  str.clear
+end
diff --git a/benchmark/bm_vm2_streq1.rb b/benchmark/bm_vm2_streq1.rb
new file mode 100644
index 0000000..2a4b0f8
--- /dev/null
+++ b/benchmark/bm_vm2_streq1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo == "literal"
+end
diff --git a/compile.c b/compile.c
index b5d3152..d301579 100644
--- a/compile.c
+++ b/compile.c
@@ -1867,6 +1867,18 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 		    opt_str_lit_arg(iseq, iobj, BOP_AREF,
 				    HASH_REDEFINED_OP_FLAG, rb_cHash, 0);
 		}
+
+		/* optimize allocation: obj == "lit" */
+		else if (ci->mid == idEq && ci->orig_argc == 1) {
+		    opt_str_lit_arg(iseq, iobj, BOP_EQ,
+				    STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+		}
+
+		/* optimize allocation: obj << "lit" */
+		else if (ci->mid == idLTLT && ci->orig_argc == 1) {
+		    opt_str_lit_arg(iseq, iobj, BOP_LTLT,
+				    STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+		}
 	    }
 	}
     }
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 4dc790f..4165e97 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2279,6 +2279,33 @@ class TestString < Test::Unit::TestCase
     end;
   end if [0].pack("l!").bytesize < [nil].pack("p").bytesize
   # enable only when string size range is smaller than memory space
+
+  def test_opt_strcat_with
+    assert_separately([], <<-RUBY)
+      class String
+        undef <<
+        def <<(str)
+          "overridden"
+        end
+      end
+      assert_equal("overridden", "" << "foo")
+      foo = "foo"
+      assert_equal("overridden", foo << "bar")
+    RUBY
+
+    if @cls == String
+      nr = 10
+      recv = ""
+      before = GC.stat(:total_allocated_objects)
+      nr.times { recv << "constant" }
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal "constant" * nr, recv
+
+      before = GC.stat(:total_allocated_objects)
+      nr.times { "recv" << "constant" }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+    end
+  end
 end
 
 class TestString2 < TestString
@@ -2286,4 +2313,33 @@ class TestString2 < TestString
     super
     @cls = S2
   end
+
+  def test_opt_streq1
+    assert_separately([], <<-RUBY)
+      class String
+        undef ==
+        def ==(str)
+          :TROO
+        end
+      end
+      assert_equal(:TROO, ("foo" == "foo"))
+    RUBY
+
+    if @cls == String
+      nr = 10
+
+      recv = "something"
+      res = []
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << (recv == "constant") }
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal [ false ], res.uniq!
+
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << (recv == "something") }
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal [ true ], res.uniq!
+    end
+  end
 end
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 04/13] opt_str_lit: further optimizations and cleanups
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
  2014-10-17  6:19 ` [PATCH 02/13] add generic and flexible opt_str_lit insn Eric Wong
  2014-10-17  6:19 ` [PATCH 03/13] compile.c: optimize << and == using putstring_for Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  2014-10-17  6:19 ` [PATCH 05/13] opt_str_lit: optimize allocations for +, %, * and === calls Eric Wong
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

Optimize the following:

* "string" == obj
* "string" != obj
*  obj == "string"
* "string".size
* "string".length

`"string" == obj' should have roughly the same performance as the more
common `obj == "string"' comparision.  Likewise for cases where != is
called on a literal string.  These are all easy-to-optimize without
requiring any new VM instructions.

The size and length optimizations are probably not needed
frequently, but they were trivial and require no new VM instructions,
either.
---
 benchmark/bm_vm2_streq2.rb |   6 ++
 compile.c                  | 150 ++++++++++++++++++++++++++++++++-------------
 insns.def                  |  40 +++++++-----
 test/ruby/test_string.rb   |  47 ++++++++++++--
 4 files changed, 181 insertions(+), 62 deletions(-)
 create mode 100644 benchmark/bm_vm2_streq2.rb

diff --git a/benchmark/bm_vm2_streq2.rb b/benchmark/bm_vm2_streq2.rb
new file mode 100644
index 0000000..986020d
--- /dev/null
+++ b/benchmark/bm_vm2_streq2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "literal" == foo
+end
diff --git a/compile.c b/compile.c
index d301579..50f9777 100644
--- a/compile.c
+++ b/compile.c
@@ -1703,30 +1703,43 @@ get_prev_insn(INSN *iobj)
     return 0;
 }
 
-static void
-opt_str_lit_recv(rb_iseq_t *iseq, INSN *iobj,
+static VALUE
+new_recvinfo_for_put(rb_iseq_t *iseq, VALUE str,
+		enum ruby_basic_operators bop, int redef_flag)
+{
+    VALUE ri = rb_ary_new_from_args(3, str, INT2FIX(bop), INT2FIX(redef_flag));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
+}
+
+static VALUE
+new_recvinfo_for_call(rb_iseq_t *iseq, VALUE str,
 		enum ruby_basic_operators bop, int redef_flag, ID mid)
 {
-    VALUE recv_info = rb_ary_new_from_args(4,
-	    iobj->operands[0], INT2FIX(bop), INT2FIX(redef_flag), ID2SYM(mid));
-    OBJ_FREEZE(recv_info);
-    iobj->insn_id = BIN(opt_str_lit);
-    iobj->operands[0] = recv_info;
-    iseq_add_mark_object(iseq, recv_info); /* XXX check if needed */
+    VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(bop), INT2FIX(redef_flag),
+				    ID2SYM(mid));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
 }
 
-static void
-opt_str_lit_arg(rb_iseq_t *iseq, INSN *iobj,
+static VALUE
+new_recvinfo_for_arg(rb_iseq_t *iseq, VALUE str,
 		enum ruby_basic_operators bop, int redef_flag,
 		VALUE klass, int recv_off)
 {
-    VALUE recv_info = rb_ary_new_from_args(5,
-	    iobj->operands[0], INT2FIX(bop), INT2FIX(redef_flag),
-	    klass, INT2FIX(recv_off));
-    OBJ_FREEZE(recv_info);
-    iobj->insn_id = BIN(opt_str_lit);
-    iobj->operands[0] = recv_info;
-    iseq_add_mark_object(iseq, recv_info); /* XXX check if needed */
+    VALUE ri = rb_ary_new_from_args(5, str, INT2FIX(bop), INT2FIX(redef_flag),
+				    klass, INT2FIX(recv_off));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
 }
 
 static int
@@ -1853,31 +1866,61 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 	if (niobj && niobj->insn_id == BIN(send)) {
 	    rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0];
 
-	    if (ci->blockiseq == 0 && !(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP)) {
-
-		/* "literal".freeze -> opt_str_lit("literal", :freeze, ...) */
-		if (ci->mid == idFreeze && ci->orig_argc == 0) {
-		    opt_str_lit_recv(iseq, iobj, BOP_FREEZE,
-				    STRING_REDEFINED_OP_FLAG, ci->mid);
-		    REMOVE_ELEM((LINK_ELEMENT *)niobj);
-		}
-
-		/* obj["literal"] -> opt_str_lit("literal", Hash, 0) */
-		else if (ci->mid == idAREF && ci->orig_argc == 1) {
-		    opt_str_lit_arg(iseq, iobj, BOP_AREF,
+	    if (!ci->blockiseq && !(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP)) {
+		VALUE ri = Qfalse;
+		VALUE str = iobj->operands[0];
+
+		switch (ci->orig_argc) {
+		  case 0:
+		    /*
+		     * optimize:
+		     * "literal".freeze
+		     * "literal".size
+		     * "literal".length
+		     */
+		    switch (ci->mid) {
+		      case idFreeze:
+			ri = new_recvinfo_for_call(iseq, str, BOP_FREEZE,
+					STRING_REDEFINED_OP_FLAG, ci->mid);
+			REMOVE_ELEM((LINK_ELEMENT *)niobj);
+			break;
+		      case idSize:
+			ri = new_recvinfo_for_put(iseq, str, BOP_SIZE,
+					STRING_REDEFINED_OP_FLAG);
+			break;
+		      case idLength:
+			ri = new_recvinfo_for_put(iseq, str, BOP_LENGTH,
+					STRING_REDEFINED_OP_FLAG);
+			break;
+		    }
+		    break;
+		  case 1:
+		    switch (ci->mid) {
+		      case idAREF:
+			/* optimize allocation: obj["lit"] */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_AREF,
 				    HASH_REDEFINED_OP_FLAG, rb_cHash, 0);
-		}
-
-		/* optimize allocation: obj == "lit" */
-		else if (ci->mid == idEq && ci->orig_argc == 1) {
-		    opt_str_lit_arg(iseq, iobj, BOP_EQ,
+			break;
+		      case idEq:
+			/* optimize allocation: obj == "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_EQ,
 				    STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			break;
+		      case idNeq:
+			/* optimize allocation: obj != "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_NEQ,
+				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			break;
+		      case idLTLT:
+			/* optimize allocation: obj << "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_LTLT,
+				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			break;
+		    }
 		}
-
-		/* optimize allocation: obj << "lit" */
-		else if (ci->mid == idLTLT && ci->orig_argc == 1) {
-		    opt_str_lit_arg(iseq, iobj, BOP_LTLT,
-				    STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+		if (ri != Qfalse) {
+		    iobj->insn_id = BIN(opt_str_lit);
+		    iobj->operands[0] = ri;
 		}
 	    }
 	}
@@ -4385,7 +4428,27 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 #endif
 	/* receiver */
 	if (type == NODE_CALL) {
-	    COMPILE(recv, "recv", node->nd_recv);
+	    /*
+	     * optimize:
+	     *   "yoda" == other -> opt_str_lit("yoda").send(:==, other)
+	     *   "yoda" != other -> opt_str_lit("yoda").send(:!=, other)
+	     */
+	    if (iseq->compile_data->option->peephole_optimization &&
+		(mid == idEq || mid == idNeq) &&
+		!private_recv_p(node) &&
+		node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
+		node->nd_args && nd_type(node->nd_args) == NODE_ARRAY &&
+		node->nd_args->nd_alen == 1)
+	    {
+		VALUE yoda = rb_fstring(node->nd_recv->nd_lit);
+		VALUE recv_info = new_recvinfo_for_put(iseq, yoda,
+					    BOP_EQ, STRING_REDEFINED_OP_FLAG);
+
+		node->nd_recv->nd_lit = yoda;
+		ADD_INSN1(recv, line, opt_str_lit, recv_info);
+	    } else {
+		COMPILE(recv, "recv", node->nd_recv);
+	    }
 	}
 	else if (type == NODE_FCALL || type == NODE_VCALL) {
 	    ADD_CALL_RECEIVER(recv, line);
@@ -5285,12 +5348,11 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	    nd_type(node->nd_args->nd_head) == NODE_STR)
 	{
 	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
-	    VALUE recv_info = rb_ary_new_from_args(5, str,
-		    INT2FIX(BOP_ASET), INT2FIX(HASH_REDEFINED_OP_FLAG),
-		    rb_cHash, INT2FIX(0));
+	    VALUE recv_info = new_recvinfo_for_arg(iseq, str,
+					    BOP_ASET, HASH_REDEFINED_OP_FLAG,
+					    rb_cHash, 0);
 
 	    node->nd_args->nd_head->nd_lit = str;
-	    iseq_add_mark_object(iseq, recv_info);
 	    if (!poped) {
 		ADD_INSN(ret, line, putnil);
 	    }
diff --git a/insns.def b/insns.def
index 9a98bf8..f6740f8 100644
--- a/insns.def
+++ b/insns.def
@@ -369,32 +369,44 @@ opt_str_lit
      * 0 - str
      * 1 - basic operator flag (BOP_*)
      * 2 - redefined flag (*_REDEFINED_OP_FLAG)
+     * optional:
      * 3 - Class (optimized receiver class) or Symbol (method name)
-     * 4 - stack offset (Fixint), only present if [3] is a Class
+     * 4 - stack offset (Fixint), only present if [3] is a Class,
+     *     -1 stack offset means receiver is the frozen string literal itself
      */
     const VALUE *ri = RARRAY_CONST_PTR(recv_info);
+    long len = RARRAY_LEN(recv_info);
     enum ruby_basic_operators bop = FIX2INT(ri[1]);
     int redef_flag = FIX2INT(ri[2]);
-    VALUE msym_or_class = ri[3];
 
     val = ri[0]; /* hopefully, this is the only val assignment we need */
-
-    /* check if the receiver is an on-stack object: */
-    if (!SYMBOL_P(msym_or_class)) {
-	VALUE recv = TOPN(FIX2INT(ri[4]));
-
-	if (SPECIAL_CONST_P(recv) ||
-		RBASIC_CLASS(recv) != msym_or_class ||
-		!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
-	    /* bad, somebody redefined an optimized method, slow path: */
-	    val = rb_str_resurrect(val);
+    if (len > 3) {
+	VALUE msym_or_class = ri[3];
+
+	/* check if the receiver is an on-stack object: */
+	if (!SYMBOL_P(msym_or_class)) {
+	    int n = FIX2INT(ri[4]);
+	    VALUE recv = n < 0 ? val : TOPN(n);
+
+	    if (SPECIAL_CONST_P(recv) ||
+		    RBASIC_CLASS(recv) != msym_or_class ||
+		    !BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+		/* bad, somebody redefined an optimized method, slow path: */
+		val = rb_str_resurrect(val);
+	    }
+	}
+	else { /* receiver is the string literal itself (e.g. "str".freeze) */
+	    if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+		/* bad, somebody redefined an optimized method, slow path: */
+		val = rb_str_resurrect(val);
+		val = rb_funcall(val, SYM2ID(msym_or_class), 0);
+	    }
 	}
     }
-    else { /* receiver is the string literal itself (e.g. "str".freeze) */
+    else { /* string lit is receiver, but there are args */
 	if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
 	    /* bad, somebody redefined an optimized method, slow path: */
 	    val = rb_str_resurrect(val);
-	    val = rb_funcall(val, SYM2ID(msym_or_class), 0);
 	}
     }
 }
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 4165e97..c1ae343 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2314,7 +2314,7 @@ class TestString2 < TestString
     @cls = S2
   end
 
-  def test_opt_streq1
+  def test_opt_str_eq_neq
     assert_separately([], <<-RUBY)
       class String
         undef ==
@@ -2322,7 +2322,36 @@ class TestString2 < TestString
           :TROO
         end
       end
-      assert_equal(:TROO, ("foo" == "foo"))
+      foo = "foo"
+      assert_equal(:TROO, (foo == "foo"), 'string == "peephole 2nd pass"')
+      assert_equal(:TROO, ("foo" == foo), '"yoda 1st pass" == string')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef !=
+        def !=(str)
+          :NOT
+        end
+      end
+      foo = ""
+      assert_equal(:NOT, ("foo" != foo), '"yoda 1st pass" != string')
+      assert_equal(:NOT, (foo != "foo"), 'string != "peephole 2nd pass"')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef size
+        undef length
+        def size
+          42
+        end
+        def length
+          42
+        end
+      end
+      assert_equal(42, "".size, 'lit string size')
+      assert_equal(42, "".length, 'lit string size')
     RUBY
 
     if @cls == String
@@ -2331,13 +2360,23 @@ class TestString2 < TestString
       recv = "something"
       res = []
       before = GC.stat(:total_allocated_objects)
-      nr.times { res << (recv == "constant") }
+      nr.times { res << (recv == "constant") } # opt_streq1
+      nr.times { res << ("constant" == recv) } # opt_streq2
+      nr.times { res << ("something " != recv) } # 1st pass peephole
+      nr.times { res << (recv != "something") }  # 2nd pass peephole
       assert_equal before, GC.stat(:total_allocated_objects)
       assert_equal [ false ], res.uniq!
 
       res.clear
       before = GC.stat(:total_allocated_objects)
-      nr.times { res << (recv == "something") }
+      nr.times { res << (recv == "something") } # opt_streq1
+      nr.times { res << ("something" == recv) } # opt_streq2
+      nr.times { res << ("constant" != recv) } # 1st pass peephole
+      nr.times { res << (recv != "constant") } # 2nd pass peephole
+      nr.times { res << ("a" != "b") } # 1st pass peephole
+      nr.times { res << ("a" == "a") } # 1st pass peephole
+      nr.times { res << ("".size == 0) } # 2nd pass peephole
+      nr.times { res << ("".length == 0) } # 2nd pass peephole
       assert_equal before, GC.stat(:total_allocated_objects)
       assert_equal [ true ], res.uniq!
     end
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 05/13] opt_str_lit: optimize allocations for +, %, * and === calls
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
                   ` (2 preceding siblings ...)
  2014-10-17  6:19 ` [PATCH 04/13] opt_str_lit: further optimizations and cleanups Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  2014-10-17  6:19 ` [PATCH 06/13] vm: automatically define optimized method enums Eric Wong
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

While experienced Rubyists know String#<< and interpolated
strings can avoid unecessary object overhead, String#+ is
often easier-to-type and the first choice for some Rubyists.

Avoid penalizing users of String#+ unnecessarily
(but keep in mind using String#<< or interpolated strings
 can further reduce overhead).

It is also common to use literal format strings with '%',
so optimize allocations away from those calls.

Some users may use === directly for comparing strings, so
avoid allocating if they happen to use string literals.

While we're at it, optimizing allocations for String#* is utterly
trivial (one extra case) I use `"lit" * Fixnum' sometimes.
---
 benchmark/bm_vm2_streqq1.rb  |  6 +++++
 benchmark/bm_vm2_streqq2.rb  |  6 +++++
 benchmark/bm_vm2_strfmt.rb   |  5 ++++
 benchmark/bm_vm2_strplus1.rb |  6 +++++
 benchmark/bm_vm2_strplus2.rb |  6 +++++
 compile.c                    | 32 ++++++++++++++++++++++--
 test/ruby/test_string.rb     | 59 +++++++++++++++++++++++++++++++++++++++++++-
 7 files changed, 117 insertions(+), 3 deletions(-)
 create mode 100644 benchmark/bm_vm2_streqq1.rb
 create mode 100644 benchmark/bm_vm2_streqq2.rb
 create mode 100644 benchmark/bm_vm2_strfmt.rb
 create mode 100644 benchmark/bm_vm2_strplus1.rb
 create mode 100644 benchmark/bm_vm2_strplus2.rb

diff --git a/benchmark/bm_vm2_streqq1.rb b/benchmark/bm_vm2_streqq1.rb
new file mode 100644
index 0000000..9183466
--- /dev/null
+++ b/benchmark/bm_vm2_streqq1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo === "literal"
+end
diff --git a/benchmark/bm_vm2_streqq2.rb b/benchmark/bm_vm2_streqq2.rb
new file mode 100644
index 0000000..f48a9cd
--- /dev/null
+++ b/benchmark/bm_vm2_streqq2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "literal" === foo
+end
diff --git a/benchmark/bm_vm2_strfmt.rb b/benchmark/bm_vm2_strfmt.rb
new file mode 100644
index 0000000..efb88b6
--- /dev/null
+++ b/benchmark/bm_vm2_strfmt.rb
@@ -0,0 +1,5 @@
+i = 0
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "%d" % i
+end
diff --git a/benchmark/bm_vm2_strplus1.rb b/benchmark/bm_vm2_strplus1.rb
new file mode 100644
index 0000000..714efb8
--- /dev/null
+++ b/benchmark/bm_vm2_strplus1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "a"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo + "b"
+end
diff --git a/benchmark/bm_vm2_strplus2.rb b/benchmark/bm_vm2_strplus2.rb
new file mode 100644
index 0000000..c7f91ed
--- /dev/null
+++ b/benchmark/bm_vm2_strplus2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "a"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "b" + foo
+end
diff --git a/compile.c b/compile.c
index 50f9777..a40831c 100644
--- a/compile.c
+++ b/compile.c
@@ -1916,6 +1916,16 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 			ri = new_recvinfo_for_arg(iseq, str, BOP_LTLT,
 				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
 			break;
+		      case idPLUS:
+			/* optimize allocation: obj + "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_PLUS,
+				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			break;
+		      case idEqq:
+			/* optimize allocation: obj === "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_EQQ,
+				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			break;
 		    }
 		}
 		if (ri != Qfalse) {
@@ -3203,6 +3213,20 @@ build_postexe_iseq(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *body)
     return Qnil;
 }
 
+static enum ruby_basic_operators
+opt_str_lit_recv_bop(ID mid)
+{
+    switch (mid) {
+      case idEq: return BOP_EQ;
+      case idNeq: return BOP_NEQ;
+      case idPLUS: return BOP_PLUS;
+      case idMULT: return BOP_MULT;
+      case idMOD: return BOP_MOD;
+      case idEqq: return BOP_EQQ;
+    }
+    return BOP_LAST_;
+}
+
 /**
   compile each node
 
@@ -4428,13 +4452,17 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 #endif
 	/* receiver */
 	if (type == NODE_CALL) {
+	    enum ruby_basic_operators bop;
 	    /*
 	     * optimize:
 	     *   "yoda" == other -> opt_str_lit("yoda").send(:==, other)
 	     *   "yoda" != other -> opt_str_lit("yoda").send(:!=, other)
+	     *   "str" + other -> opt_str_lit("str").send(:+, other)
+	     *   "str" * other -> opt_str_lit("str").send(:*, other)
+	     *   "fmt" % args -> opt_str_lit("str").send(:%, other)
 	     */
 	    if (iseq->compile_data->option->peephole_optimization &&
-		(mid == idEq || mid == idNeq) &&
+		((bop = opt_str_lit_recv_bop(mid)) != BOP_LAST_) &&
 		!private_recv_p(node) &&
 		node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
 		node->nd_args && nd_type(node->nd_args) == NODE_ARRAY &&
@@ -4442,7 +4470,7 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	    {
 		VALUE yoda = rb_fstring(node->nd_recv->nd_lit);
 		VALUE recv_info = new_recvinfo_for_put(iseq, yoda,
-					    BOP_EQ, STRING_REDEFINED_OP_FLAG);
+					    bop, STRING_REDEFINED_OP_FLAG);
 
 		node->nd_recv->nd_lit = yoda;
 		ADD_INSN1(recv, line, opt_str_lit, recv_info);
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index c1ae343..d3357b0 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2314,7 +2314,7 @@ class TestString2 < TestString
     @cls = S2
   end
 
-  def test_opt_str_eq_neq
+  def test_opt_str_lit
     assert_separately([], <<-RUBY)
       class String
         undef ==
@@ -2354,6 +2354,40 @@ class TestString2 < TestString
       assert_equal(42, "".length, 'lit string size')
     RUBY
 
+    assert_separately([], <<-RUBY)
+      class String
+        undef +
+        def +(other)
+          :plus
+        end
+      end
+      foo = "a"
+      assert_equal(:plus, "" + foo, 'lit plus')
+      assert_equal(:plus, foo + "", 'plus lit')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef *
+        def *(other)
+          :mult
+        end
+      end
+      assert_equal(:mult, "x" * 3, 'lit mult')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef ===
+        def ===(other)
+          other
+        end
+      end
+      str = "y"
+      assert_equal(false, "x" === false, 'lit threequal')
+      assert_equal("x", str === "x", 'threequal lit')
+    RUBY
+
     if @cls == String
       nr = 10
 
@@ -2363,6 +2397,8 @@ class TestString2 < TestString
       nr.times { res << (recv == "constant") } # opt_streq1
       nr.times { res << ("constant" == recv) } # opt_streq2
       nr.times { res << ("something " != recv) } # 1st pass peephole
+      nr.times { res << ("constant" == recv) } # opt_streq2
+      nr.times { res << ("constant" === recv) } # opt_streqq2
       nr.times { res << (recv != "something") }  # 2nd pass peephole
       assert_equal before, GC.stat(:total_allocated_objects)
       assert_equal [ false ], res.uniq!
@@ -2371,6 +2407,8 @@ class TestString2 < TestString
       before = GC.stat(:total_allocated_objects)
       nr.times { res << (recv == "something") } # opt_streq1
       nr.times { res << ("something" == recv) } # opt_streq2
+      nr.times { res << ("something" === recv) } # opt_streqq2
+      nr.times { res << (recv === "something") } # opt_streqq2
       nr.times { res << ("constant" != recv) } # 1st pass peephole
       nr.times { res << (recv != "constant") } # 2nd pass peephole
       nr.times { res << ("a" != "b") } # 1st pass peephole
@@ -2379,6 +2417,25 @@ class TestString2 < TestString
       nr.times { res << ("".length == 0) } # 2nd pass peephole
       assert_equal before, GC.stat(:total_allocated_objects)
       assert_equal [ true ], res.uniq!
+
+      # :+ optimizations
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << ("foo" + recv) }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_equal [ "foosomething" ], res.uniq!
+
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << (recv + "foo") }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_equal [ "somethingfoo" ], res.uniq!
+
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << ('a' * 3) }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_equal [ "aaa" ], res.uniq!
     end
   end
 end
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 06/13] vm: automatically define optimized method enums
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
                   ` (3 preceding siblings ...)
  2014-10-17  6:19 ` [PATCH 05/13] opt_str_lit: optimize allocations for +, %, * and === calls Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  2014-10-17  6:19 ` [PATCH 07/13] fix mismerge Eric Wong
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

This allows more flexible management of optimized methods instead
of constraining us to a 2-dimensional bitmap array where some
classes may have many optimized methods and others have few.
---
 common.mk                    |  18 +++++-
 compile.c                    |  81 ++++++++++++---------------
 defs/opt_method.def          |  49 ++++++++++++++++
 insns.def                    | 129 +++++++++++++++++++------------------------
 template/opt_method.h.tmpl   |  71 ++++++++++++++++++++++++
 template/opt_method.inc.tmpl |  49 ++++++++++++++++
 vm.c                         |  67 +++-------------------
 vm_core.h                    |  44 ++-------------
 vm_insnhelper.c              |   8 +--
 vm_insnhelper.h              |  25 ++++++++-
 10 files changed, 318 insertions(+), 223 deletions(-)
 create mode 100644 defs/opt_method.def
 create mode 100644 template/opt_method.h.tmpl
 create mode 100644 template/opt_method.inc.tmpl

diff --git a/common.mk b/common.mk
index ce01aca..0a533a5 100644
--- a/common.mk
+++ b/common.mk
@@ -639,7 +639,7 @@ PROBES_H_INCLUDES  = {$(VPATH)}probes.h
 VM_CORE_H_INCLUDES = {$(VPATH)}vm_core.h {$(VPATH)}thread_$(THREAD_MODEL).h \
 		     {$(VPATH)}node.h {$(VPATH)}method.h {$(VPATH)}ruby_atomic.h \
 	             {$(VPATH)}vm_debug.h {$(VPATH)}id.h {$(VPATH)}thread_native.h \
-	             $(CCAN_LIST_INCLUDES)
+	             $(CCAN_LIST_INCLUDES) {$(VPATH)}opt_method.h
 
 ###
 
@@ -826,7 +826,7 @@ vm.$(OBJEXT): {$(VPATH)}vm.c {$(VPATH)}gc.h {$(VPATH)}iseq.h \
   $(VM_CORE_H_INCLUDES) {$(VPATH)}vm_method.c {$(VPATH)}vm_eval.c \
   {$(VPATH)}vm_insnhelper.c {$(VPATH)}vm_insnhelper.h {$(VPATH)}vm_exec.c \
   {$(VPATH)}vm_exec.h {$(VPATH)}insns.def {$(VPATH)}vmtc.inc \
-  {$(VPATH)}vm.inc {$(VPATH)}insns.inc \
+  {$(VPATH)}vm.inc {$(VPATH)}insns.inc {$(VPATH)}opt_method.inc \
   {$(VPATH)}internal.h {$(VPATH)}vm.h {$(VPATH)}constant.h \
   $(PROBES_H_INCLUDES) {$(VPATH)}probes_helper.h {$(VPATH)}vm_opts.h
 vm_dump.$(OBJEXT): {$(VPATH)}vm_dump.c $(RUBY_H_INCLUDES) \
@@ -931,6 +931,20 @@ incs: $(INSNS) {$(VPATH)}node_name.inc {$(VPATH)}encdb.h {$(VPATH)}transdb.h {$(
 
 insns: $(INSNS)
 
+opt_method.h: $(srcdir)/tool/generic_erb.rb \
+		$(srcdir)/template/opt_method.h.tmpl \
+		$(srcdir)/defs/opt_method.def
+	$(ECHO) generating $@
+	$(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
+		$(srcdir)/template/opt_method.h.tmpl
+
+opt_method.inc: $(srcdir)/tool/generic_erb.rb \
+		$(srcdir)/template/opt_method.inc.tmpl \
+		$(srcdir)/defs/opt_method.def
+	$(ECHO) generating $@
+	$(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
+		$(srcdir)/template/opt_method.inc.tmpl
+
 id.h: $(srcdir)/tool/generic_erb.rb $(srcdir)/template/id.h.tmpl $(srcdir)/defs/id.def
 	$(ECHO) generating $@
 	$(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
diff --git a/compile.c b/compile.c
index a40831c..7d6771b 100644
--- a/compile.c
+++ b/compile.c
@@ -1703,11 +1703,12 @@ get_prev_insn(INSN *iobj)
     return 0;
 }
 
+#define new_recvinfo_for_put(iseq,str,mid,klass) \
+    new_recvinfo_for_put_(iseq,str,OM_##mid##__##klass)
 static VALUE
-new_recvinfo_for_put(rb_iseq_t *iseq, VALUE str,
-		enum ruby_basic_operators bop, int redef_flag)
+new_recvinfo_for_put_(rb_iseq_t *iseq, VALUE str, enum ruby_optimized_method om)
 {
-    VALUE ri = rb_ary_new_from_args(3, str, INT2FIX(bop), INT2FIX(redef_flag));
+    VALUE ri = rb_ary_new_from_args(2, str, INT2FIX(om));
 
     hide_obj(ri);
     iseq_add_mark_object(iseq, ri);
@@ -1715,12 +1716,13 @@ new_recvinfo_for_put(rb_iseq_t *iseq, VALUE str,
     return ri;
 }
 
+#define new_recvinfo_for_call(iseq,str,mid,klass) \
+    new_recvinfo_for_call_((iseq),(str),OM_##mid##__##klass,(mid))
 static VALUE
-new_recvinfo_for_call(rb_iseq_t *iseq, VALUE str,
-		enum ruby_basic_operators bop, int redef_flag, ID mid)
+new_recvinfo_for_call_(rb_iseq_t *iseq, VALUE str,
+		    enum ruby_optimized_method om, ID mid)
 {
-    VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(bop), INT2FIX(redef_flag),
-				    ID2SYM(mid));
+    VALUE ri = rb_ary_new_from_args(3, str, INT2FIX(om), ID2SYM(mid));
 
     hide_obj(ri);
     iseq_add_mark_object(iseq, ri);
@@ -1728,12 +1730,13 @@ new_recvinfo_for_call(rb_iseq_t *iseq, VALUE str,
     return ri;
 }
 
+#define new_recvinfo_for_arg(iseq,str,mid,klass,off) \
+    new_recvinfo_for_arg_((iseq),(str),OM_##mid##__##klass,(rb_c##klass),(off))
 static VALUE
-new_recvinfo_for_arg(rb_iseq_t *iseq, VALUE str,
-		enum ruby_basic_operators bop, int redef_flag,
-		VALUE klass, int recv_off)
+new_recvinfo_for_arg_(rb_iseq_t *iseq, VALUE str,
+		enum ruby_optimized_method om, VALUE klass, int recv_off)
 {
-    VALUE ri = rb_ary_new_from_args(5, str, INT2FIX(bop), INT2FIX(redef_flag),
+    VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(om),
 				    klass, INT2FIX(recv_off));
 
     hide_obj(ri);
@@ -1880,17 +1883,14 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 		     */
 		    switch (ci->mid) {
 		      case idFreeze:
-			ri = new_recvinfo_for_call(iseq, str, BOP_FREEZE,
-					STRING_REDEFINED_OP_FLAG, ci->mid);
+			ri = new_recvinfo_for_call(iseq, str, idFreeze, String);
 			REMOVE_ELEM((LINK_ELEMENT *)niobj);
 			break;
 		      case idSize:
-			ri = new_recvinfo_for_put(iseq, str, BOP_SIZE,
-					STRING_REDEFINED_OP_FLAG);
+			ri = new_recvinfo_for_put(iseq, str, idSize, String);
 			break;
 		      case idLength:
-			ri = new_recvinfo_for_put(iseq, str, BOP_LENGTH,
-					STRING_REDEFINED_OP_FLAG);
+			ri = new_recvinfo_for_put(iseq, str, idLength, String);
 			break;
 		    }
 		    break;
@@ -1898,33 +1898,27 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 		    switch (ci->mid) {
 		      case idAREF:
 			/* optimize allocation: obj["lit"] */
-			ri = new_recvinfo_for_arg(iseq, str, BOP_AREF,
-				    HASH_REDEFINED_OP_FLAG, rb_cHash, 0);
+			ri = new_recvinfo_for_arg(iseq, str, idAREF, Hash, 0);
 			break;
 		      case idEq:
 			/* optimize allocation: obj == "lit" */
-			ri = new_recvinfo_for_arg(iseq, str, BOP_EQ,
-				    STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			ri = new_recvinfo_for_arg(iseq, str, idEq, String, 0);
 			break;
 		      case idNeq:
 			/* optimize allocation: obj != "lit" */
-			ri = new_recvinfo_for_arg(iseq, str, BOP_NEQ,
-				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			ri = new_recvinfo_for_arg(iseq, str, idNeq, String, 0);
 			break;
 		      case idLTLT:
 			/* optimize allocation: obj << "lit" */
-			ri = new_recvinfo_for_arg(iseq, str, BOP_LTLT,
-				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			ri = new_recvinfo_for_arg(iseq, str, idLTLT, String, 0);
 			break;
 		      case idPLUS:
 			/* optimize allocation: obj + "lit" */
-			ri = new_recvinfo_for_arg(iseq, str, BOP_PLUS,
-				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			ri = new_recvinfo_for_arg(iseq, str, idPLUS, String, 0);
 			break;
 		      case idEqq:
 			/* optimize allocation: obj === "lit" */
-			ri = new_recvinfo_for_arg(iseq, str, BOP_EQQ,
-				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			ri = new_recvinfo_for_arg(iseq, str, idEqq, String, 0);
 			break;
 		    }
 		}
@@ -3213,18 +3207,18 @@ build_postexe_iseq(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *body)
     return Qnil;
 }
 
-static enum ruby_basic_operators
-opt_str_lit_recv_bop(ID mid)
+static enum ruby_optimized_method
+opt_str_lit_recv_om(ID mid)
 {
     switch (mid) {
-      case idEq: return BOP_EQ;
-      case idNeq: return BOP_NEQ;
-      case idPLUS: return BOP_PLUS;
-      case idMULT: return BOP_MULT;
-      case idMOD: return BOP_MOD;
-      case idEqq: return BOP_EQQ;
+      case idEq: return OM_idEq__String;
+      case idNeq: return OM_idNeq__String;
+      case idPLUS: return OM_idPLUS__String;
+      case idMULT: return OM_idMULT__String;
+      case idMOD: return OM_idMOD__String;
+      case idEqq: return OM_idEqq__String;
     }
-    return BOP_LAST_;
+    return OM_LAST_;
 }
 
 /**
@@ -4452,7 +4446,7 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 #endif
 	/* receiver */
 	if (type == NODE_CALL) {
-	    enum ruby_basic_operators bop;
+	    enum ruby_optimized_method om;
 	    /*
 	     * optimize:
 	     *   "yoda" == other -> opt_str_lit("yoda").send(:==, other)
@@ -4462,15 +4456,14 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	     *   "fmt" % args -> opt_str_lit("str").send(:%, other)
 	     */
 	    if (iseq->compile_data->option->peephole_optimization &&
-		((bop = opt_str_lit_recv_bop(mid)) != BOP_LAST_) &&
+		((om = opt_str_lit_recv_om(mid)) != OM_LAST_) &&
 		!private_recv_p(node) &&
 		node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
 		node->nd_args && nd_type(node->nd_args) == NODE_ARRAY &&
 		node->nd_args->nd_alen == 1)
 	    {
 		VALUE yoda = rb_fstring(node->nd_recv->nd_lit);
-		VALUE recv_info = new_recvinfo_for_put(iseq, yoda,
-					    bop, STRING_REDEFINED_OP_FLAG);
+		VALUE recv_info = new_recvinfo_for_put_(iseq, yoda, om);
 
 		node->nd_recv->nd_lit = yoda;
 		ADD_INSN1(recv, line, opt_str_lit, recv_info);
@@ -5376,9 +5369,7 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	    nd_type(node->nd_args->nd_head) == NODE_STR)
 	{
 	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
-	    VALUE recv_info = new_recvinfo_for_arg(iseq, str,
-					    BOP_ASET, HASH_REDEFINED_OP_FLAG,
-					    rb_cHash, 0);
+	    VALUE recv_info = new_recvinfo_for_arg(iseq, str, idASET, Hash, 0);
 
 	    node->nd_args->nd_head->nd_lit = str;
 	    if (!poped) {
diff --git a/defs/opt_method.def b/defs/opt_method.def
new file mode 100644
index 0000000..acc5e6b
--- /dev/null
+++ b/defs/opt_method.def
@@ -0,0 +1,49 @@
+# byte align the bitmap for now, maybe some arches do better with long or int
+# we may also use a larger size (in the unlikely case) we need more than
+# 7 optimized classes per mid.   Currently this caps us to 256 optimized
+# (mid, klass) combinations (tested with OM_SHIFT=4, giving us 64K)
+OM_SHIFT = 3
+OM_ALIGN = 1 << OM_SHIFT
+OM_ALIGN_MASK = ~(OM_ALIGN - 1)
+OPT_METHODS = [
+  %w(idPLUS Fixnum Float String Array),
+  %w(idMINUS Fixnum Float),
+  %w(idMULT Fixnum Float String),
+  %w(idDIV Fixnum Float),
+  %w(idMOD Fixnum Float String),
+  %w(idEq Fixnum Float String),
+  %w(idNeq Fixnum Float String),
+  # id, mask classes
+  [ 'idEqq', %w(Bignum Fixnum Float Symbol), *%w(String) ],
+  %w(idLT Fixnum Float),
+  %w(idLE Fixnum Float),
+  %w(idGT Fixnum Float),
+  %w(idGE Fixnum Float),
+  %w(idLTLT String Array),
+  %w(idAREF Array Hash),
+  %w(idASET Array Hash),
+  %w(idLength Array String Hash),
+  %w(idSize Array String Hash),
+  %w(idEmptyP Array String Hash),
+  %w(idSucc Fixnum String Time),
+  %w(idEqTilde Regexp String),
+  %w(idFreeze String),
+]
+
+# for checking optimized classes,
+# speeds up method definitions of non-core classes
+def opt_classes
+  rv = {}
+  OPT_METHODS.each do |(_, *classes)|
+    classes.flatten.each { |c| rv[c] = true }
+  end
+  rv
+end
+
+def om(mid, klass)
+  if Array === klass
+    "OM_#{mid}__#{klass.join('_')}"
+  else
+    "OM_#{mid}__#{klass}"
+  end
+end
diff --git a/insns.def b/insns.def
index f6740f8..e304338 100644
--- a/insns.def
+++ b/insns.def
@@ -367,36 +367,34 @@ opt_str_lit
     /*
      * recv_info:
      * 0 - str
-     * 1 - basic operator flag (BOP_*)
-     * 2 - redefined flag (*_REDEFINED_OP_FLAG)
+     * 1 - optimized method flag (OM_*)
      * optional:
-     * 3 - Class (optimized receiver class) or Symbol (method name)
-     * 4 - stack offset (Fixint), only present if [3] is a Class,
+     * 2 - Class (optimized receiver class) or Symbol (method name)
+     * 3 - stack offset (Fixint), only present if [3] is a Class,
      *     -1 stack offset means receiver is the frozen string literal itself
      */
     const VALUE *ri = RARRAY_CONST_PTR(recv_info);
     long len = RARRAY_LEN(recv_info);
-    enum ruby_basic_operators bop = FIX2INT(ri[1]);
-    int redef_flag = FIX2INT(ri[2]);
+    enum ruby_optimized_method om = FIX2INT(ri[1]);
 
     val = ri[0]; /* hopefully, this is the only val assignment we need */
-    if (len > 3) {
-	VALUE msym_or_class = ri[3];
+    if (len > 2) {
+	VALUE msym_or_class = ri[2];
 
 	/* check if the receiver is an on-stack object: */
 	if (!SYMBOL_P(msym_or_class)) {
-	    int n = FIX2INT(ri[4]);
+	    int n = FIX2INT(ri[3]);
 	    VALUE recv = n < 0 ? val : TOPN(n);
 
 	    if (SPECIAL_CONST_P(recv) ||
 		    RBASIC_CLASS(recv) != msym_or_class ||
-		    !BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+		    !rb_basic_op_unredefined_p(om)) {
 		/* bad, somebody redefined an optimized method, slow path: */
 		val = rb_str_resurrect(val);
 	    }
 	}
 	else { /* receiver is the string literal itself (e.g. "str".freeze) */
-	    if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+	    if (!rb_basic_op_unredefined_p(om)) {
 		/* bad, somebody redefined an optimized method, slow path: */
 		val = rb_str_resurrect(val);
 		val = rb_funcall(val, SYM2ID(msym_or_class), 0);
@@ -404,7 +402,7 @@ opt_str_lit
 	}
     }
     else { /* string lit is receiver, but there are args */
-	if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+	if (!rb_basic_op_unredefined_p(om)) {
 	    /* bad, somebody redefined an optimized method, slow path: */
 	    val = rb_str_resurrect(val);
 	}
@@ -1328,11 +1326,7 @@ opt_case_dispatch
       case T_FIXNUM:
       case T_BIGNUM:
       case T_STRING:
-	if (BASIC_OP_UNREDEFINED_P(BOP_EQQ,
-				   SYMBOL_REDEFINED_OP_FLAG |
-				   FIXNUM_REDEFINED_OP_FLAG |
-				   BIGNUM_REDEFINED_OP_FLAG |
-				   STRING_REDEFINED_OP_FLAG)) {
+	if (rb_basic_mask_unredefined_p(OM_idEqq__Bignum_Fixnum_Float_Symbol)) {
 	    st_data_t val;
 	    if (st_lookup(RHASH_TBL_RAW(hash), key, &val)) {
 		JUMP(FIX2INT((VALUE)val));
@@ -1360,8 +1354,7 @@ opt_plus
 (VALUE recv, VALUE obj)
 (VALUE val)
 {
-    if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_PLUS,FIXNUM_REDEFINED_OP_FLAG)) {
+    if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idPLUS, Fixnum)) {
 	/* fixnum + fixnum */
 #ifndef LONG_LONG_VALUE
 	val = (recv + (obj & (~1)));
@@ -1384,20 +1377,20 @@ opt_plus
 #endif
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idPLUS, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
-	    BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idPLUS, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
 	}
 	else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString &&
-		 BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idPLUS, String)) {
 	    val = rb_str_plus(recv, obj);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idPLUS, Array)) {
 	    val = rb_ary_plus(recv, obj);
 	}
 	else {
@@ -1424,7 +1417,7 @@ opt_minus
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_MINUS, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idMINUS, Fixnum)) {
 	long a, b, c;
 
 	a = FIX2LONG(recv);
@@ -1439,12 +1432,12 @@ opt_minus
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idMINUS, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idMINUS, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1472,7 +1465,7 @@ opt_mult
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_MULT, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idMULT, Fixnum)) {
 	long a, b;
 
 	a = FIX2LONG(recv);
@@ -1489,13 +1482,12 @@ opt_mult
             }
 	}
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMULT, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idMULT, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1521,8 +1513,7 @@ opt_div
 (VALUE recv, VALUE obj)
 (VALUE val)
 {
-    if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_DIV, FIXNUM_REDEFINED_OP_FLAG)) {
+    if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idDIV, Fixnum)) {
 	long x, y, div;
 
 	x = FIX2LONG(recv);
@@ -1552,13 +1543,12 @@ opt_div
 	}
 	val = LONG2NUM(div);
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idDIV, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idDIV, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1584,8 +1574,7 @@ opt_mod
 (VALUE recv, VALUE obj)
 (VALUE val)
 {
-    if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_MOD, FIXNUM_REDEFINED_OP_FLAG )) {
+    if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMOD, Fixnum )) {
 	long x, y;
 
 	x = FIX2LONG(recv);
@@ -1619,13 +1608,12 @@ opt_mod
 	    val = LONG2FIX(mod);
 	}
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMOD, Float)) {
 	val = DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
-	    BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idMOD, Float)) {
 	    val = DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)));
 	}
 	else {
@@ -1704,7 +1692,7 @@ opt_lt
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_LT, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idLT, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a < b) {
@@ -1715,13 +1703,13 @@ opt_lt
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idLT, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) < RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idLT, Float)) {
 	    val = double_cmp_lt(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1748,7 +1736,7 @@ opt_le
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_LE, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idLE, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a <= b) {
@@ -1759,7 +1747,7 @@ opt_le
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_LE, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idLE, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) <= RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
@@ -1783,7 +1771,7 @@ opt_gt
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_GT, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idGT, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a > b) {
@@ -1794,13 +1782,13 @@ opt_gt
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idGT, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) > RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idGT, Float)) {
 	    val = double_cmp_gt(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1827,7 +1815,7 @@ opt_ge
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_GE, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idGE, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a >= b) {
@@ -1837,8 +1825,7 @@ opt_ge
 	    val = Qfalse;
 	}
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_GE, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idGE, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) >= RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
@@ -1862,11 +1849,11 @@ opt_ltlt
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_LTLT, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idLTLT, String)) {
 	    val = rb_str_concat(recv, obj);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_LTLT, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idLTLT, Array)) {
 	    val = rb_ary_push(recv, obj);
 	}
 	else {
@@ -1893,10 +1880,10 @@ opt_aref
 (VALUE val)
 {
     if (!SPECIAL_CONST_P(recv)) {
-	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(BOP_AREF, ARRAY_REDEFINED_OP_FLAG) && FIXNUM_P(obj)) {
+	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(idAREF, Array) && FIXNUM_P(obj)) {
 	    val = rb_ary_entry(recv, FIX2LONG(obj));
 	}
-	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) {
+	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(idAREF, Hash)) {
 	    val = rb_hash_aref(recv, obj);
 	}
 	else {
@@ -1923,11 +1910,11 @@ opt_aset
 (VALUE val)
 {
     if (!SPECIAL_CONST_P(recv)) {
-	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(BOP_ASET, ARRAY_REDEFINED_OP_FLAG) && FIXNUM_P(obj)) {
+	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(idASET, Array) && FIXNUM_P(obj)) {
 	    rb_ary_store(recv, FIX2LONG(obj), set);
 	    val = set;
 	}
-	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) {
+	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(idASET, Hash)) {
 	    rb_hash_aset(recv, obj, set);
 	    val = set;
 	}
@@ -1957,15 +1944,15 @@ opt_length
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_LENGTH, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idLength, String)) {
 	    val = rb_str_length(recv);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_LENGTH, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idLength, Array)) {
 	    val = LONG2NUM(RARRAY_LEN(recv));
 	}
 	else if (RBASIC_CLASS(recv) == rb_cHash &&
-		 BASIC_OP_UNREDEFINED_P(BOP_LENGTH, HASH_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idLength, Hash)) {
 	    val = INT2FIX(RHASH_SIZE(recv));
 	}
 	else {
@@ -1992,15 +1979,15 @@ opt_size
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_SIZE, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idSize, String)) {
 	    val = rb_str_length(recv);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_SIZE, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idSize, Array)) {
 	    val = LONG2NUM(RARRAY_LEN(recv));
 	}
 	else if (RBASIC_CLASS(recv) == rb_cHash &&
-		 BASIC_OP_UNREDEFINED_P(BOP_SIZE, HASH_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idSize, Hash)) {
 	    val = INT2FIX(RHASH_SIZE(recv));
 	}
 	else {
@@ -2027,17 +2014,17 @@ opt_empty_p
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idEmptyP, String)) {
 	    if (RSTRING_LEN(recv) == 0) val = Qtrue;
 	    else val = Qfalse;
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idEmptyP, Array)) {
 	    if (RARRAY_LEN(recv) == 0) val = Qtrue;
 	    else val = Qfalse;
 	}
 	else if (RBASIC_CLASS(recv) == rb_cHash &&
-		 BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, HASH_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idEmptyP, Hash)) {
 	    if (RHASH_EMPTY_P(recv)) val = Qtrue;
 	    else val = Qfalse;
 	}
@@ -2065,7 +2052,7 @@ opt_succ
 {
     if (SPECIAL_CONST_P(recv)) {
 	if (FIXNUM_P(recv) &&
-	    BASIC_OP_UNREDEFINED_P(BOP_SUCC, FIXNUM_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idSucc, Fixnum)) {
 	    const VALUE obj = INT2FIX(1);
 	    /* fixnum + INT2FIX(1) */
 	    val = (recv + (obj & (~1)));
@@ -2080,11 +2067,11 @@ opt_succ
     }
     else {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_SUCC, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idSucc, String)) {
 	    val = rb_str_succ(recv);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cTime &&
-		 BASIC_OP_UNREDEFINED_P(BOP_SUCC, TIME_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idSucc, Time)) {
 	    val = rb_time_succ(recv);
 	}
 	else
@@ -2134,7 +2121,7 @@ opt_regexpmatch1
 (VALUE obj)
 (VALUE val)
 {
-    if (BASIC_OP_UNREDEFINED_P(BOP_MATCH, REGEXP_REDEFINED_OP_FLAG)) {
+    if (BASIC_OP_UNREDEFINED_P(idEqTilde, Regexp)) {
 	val = rb_reg_match(r, obj);
     }
     else {
@@ -2154,7 +2141,7 @@ opt_regexpmatch2
 (VALUE val)
 {
     if (CLASS_OF(obj2) == rb_cString &&
-	BASIC_OP_UNREDEFINED_P(BOP_MATCH, STRING_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idEqTilde, String)) {
 	val = rb_reg_match(obj1, obj2);
     }
     else {
diff --git a/template/opt_method.h.tmpl b/template/opt_method.h.tmpl
new file mode 100644
index 0000000..39c4043
--- /dev/null
+++ b/template/opt_method.h.tmpl
@@ -0,0 +1,71 @@
+/* DO NOT EDIT THIS FILE DIRECTLY: edit template/opt_method.h.tmpl instead */
+#ifndef RUBY_OPT_METHOD_H
+#define RUBY_OPT_METHOD_H
+<%
+defs = File.join(File.dirname(File.dirname(erb.filename)), "defs/opt_method.def")
+eval(File.read(defs), binding, defs)
+%>
+typedef uint<%= OM_ALIGN %>_t rb_om_bitmap_t;
+
+enum ruby_optimized_method {
+<%
+opt_masks = {}
+n = 0
+OPT_METHODS.each do |(mid, *classes)|
+  classes.each do |klass|
+    if Array === klass
+      opt_masks[mid] = klass.dup
+      # we will align these in the second loop, below
+      next
+    end %>
+    <%= om(mid, klass) %> = <%= n += 1 %>,
+<%
+  end # classes.each
+end # OPT_METHODS.each
+
+# align multi-class bits so a single AND operation may
+# be byte-aligned and used to check an mid for up to 7 classes at once:
+opt_masks.each do |mid, classes|
+  # round up n to the next aligned byte slot
+  n = (n + OM_ALIGN) & OM_ALIGN_MASK
+
+  classes.each do |k|
+%>
+    <%= om(mid, k) %> = <%= n += 1 %>,
+<%=
+# we need this macro to generate shifts for the masks enums below:
+"#define #{om(mid, k)} (#{n})"
+%>
+<%
+  end # classes.each
+end # opt_masks.each
+if n >= ((1 << OM_ALIGN) - 1)
+  raise "OM_ALIGN needs to be raised to support more optimized methods"
+end
+%>
+    OM_LAST_ = <%= om_last = (n += 1) %>, /* for bitmap sizing */
+    /* special mask values below */
+<%
+# generate mask enums
+opt_masks.each do |mid, c|
+  # n.b.: negate masks to simplify the rb_opt_method_is_mask check:
+%>
+    <%= om(mid, c) %> = -(<%=
+      # pack into 16 bits so it may be a negative Fixnum
+      # 1) 8 byte offset
+      # 2) OM_ALIGN bytes mask (8 or 16)
+      sep = "|\n    "
+      "/* offset: */ ((#{om(mid, c[0])} / #{OM_ALIGN}) << #{OM_ALIGN}) " \
+      "#{sep} /* mask: */ (" +
+      c.map { |k| "(1U << (#{om(mid, k)} % #{OM_ALIGN}))" }.join(sep) + # mask
+      ')'
+  %>),
+<%
+end # opt_masks.each
+%>
+    OM_ALIGN_ = <%= OM_ALIGN %>,
+    OM_SIZE_ = <%= ((om_last + OM_ALIGN) & OM_ALIGN_MASK) / OM_ALIGN %>,
+    OM_GETMASK_ = (1 << OM_ALIGN_) - 1
+};
+
+#endif /* RUBY_OPT_METHOD_H */
diff --git a/template/opt_method.inc.tmpl b/template/opt_method.inc.tmpl
new file mode 100644
index 0000000..0501121
--- /dev/null
+++ b/template/opt_method.inc.tmpl
@@ -0,0 +1,49 @@
+/* DO NOT EDIT THIS FILE DIRECTLY: edit template/opt_method.inc.tmpl instead */
+<%
+defs = File.join(File.dirname(File.dirname(erb.filename)), "defs/opt_method.def")
+eval(File.read(defs), binding, defs)
+%>
+
+static void
+add_opt_method(st_table *tbl, VALUE klass, ID mid,
+		enum ruby_optimized_method om)
+{
+    rb_method_entry_t *me = rb_method_entry_at(klass, mid);
+
+    if (me && me->def && me->def->type == VM_METHOD_TYPE_CFUNC) {
+	st_insert(tbl, (st_data_t)me, (st_data_t)om);
+    }
+    else if (mid != idNeq) {
+	rb_bug("undefined optimized method: %s", rb_id2name(mid));
+    }
+}
+
+static void
+vm_init_redefined_flags(void *tbl)
+{
+<%
+OPT_METHODS.each do |(mid, *classes)|
+  classes.each do |klass|
+    if Array === klass
+      klass.each do |k|
+%>
+    add_opt_method(tbl, rb_c<%= k %>, <%= mid %>, <%= om(mid, k) %>);
+<%
+      end # klass.each
+    else
+%>
+    add_opt_method(tbl, rb_c<%= klass %>, <%= mid %>, <%= om(mid, klass) %>);
+<%  end # !(Array === klass)
+  end # classes.each
+end # OPT_METHODS.each
+%>
+}
+
+static int
+vm_redefinition_check_flag(VALUE klass)
+{
+<% opt_classes.each_key do |klass| %>
+    if (klass == rb_c<%= klass %>) return 1;
+<% end %>
+    return 0;
+}
diff --git a/vm.c b/vm.c
index cc88926..35c4120 100644
--- a/vm.c
+++ b/vm.c
@@ -20,6 +20,7 @@
 #include "eval_intern.h"
 #include "probes.h"
 #include "probes_helper.h"
+#include "opt_method.inc"
 
 static inline VALUE *
 VM_EP_LEP(VALUE *ep)
@@ -1134,30 +1135,16 @@ rb_iter_break_value(VALUE val)
 
 static st_table *vm_opt_method_table = 0;
 
-static int
-vm_redefinition_check_flag(VALUE klass)
-{
-    if (klass == rb_cFixnum) return FIXNUM_REDEFINED_OP_FLAG;
-    if (klass == rb_cFloat)  return FLOAT_REDEFINED_OP_FLAG;
-    if (klass == rb_cString) return STRING_REDEFINED_OP_FLAG;
-    if (klass == rb_cArray)  return ARRAY_REDEFINED_OP_FLAG;
-    if (klass == rb_cHash)   return HASH_REDEFINED_OP_FLAG;
-    if (klass == rb_cBignum) return BIGNUM_REDEFINED_OP_FLAG;
-    if (klass == rb_cSymbol) return SYMBOL_REDEFINED_OP_FLAG;
-    if (klass == rb_cTime)   return TIME_REDEFINED_OP_FLAG;
-    if (klass == rb_cRegexp) return REGEXP_REDEFINED_OP_FLAG;
-    return 0;
-}
-
 static void
 rb_vm_check_redefinition_opt_method(const rb_method_entry_t *me, VALUE klass)
 {
-    st_data_t bop;
+    st_data_t om;
     if (!me->def || me->def->type == VM_METHOD_TYPE_CFUNC) {
-	if (st_lookup(vm_opt_method_table, (st_data_t)me, &bop)) {
-	    int flag = vm_redefinition_check_flag(klass);
+	if (st_lookup(vm_opt_method_table, (st_data_t)me, &om)) {
+	    unsigned int i = om / OM_ALIGN_;
+	    rb_om_bitmap_t mask = (rb_om_bitmap_t)(1U << (om % OM_ALIGN_));
 
-	    ruby_vm_redefined_flag[bop] |= flag;
+	    ruby_vm_redefined_flag[i] |= mask;
 	}
     }
 }
@@ -1184,51 +1171,11 @@ rb_vm_check_redefinition_by_prepend(VALUE klass)
 }
 
 static void
-add_opt_method(VALUE klass, ID mid, VALUE bop)
-{
-    rb_method_entry_t *me = rb_method_entry_at(klass, mid);
-
-    if (me && me->def &&
-	me->def->type == VM_METHOD_TYPE_CFUNC) {
-	st_insert(vm_opt_method_table, (st_data_t)me, (st_data_t)bop);
-    }
-    else {
-	rb_bug("undefined optimized method: %s", rb_id2name(mid));
-    }
-}
-
-static void
 vm_init_redefined_flag(void)
 {
-    ID mid;
-    VALUE bop;
-
     vm_opt_method_table = st_init_numtable();
 
-#define OP(mid_, bop_) (mid = id##mid_, bop = BOP_##bop_, ruby_vm_redefined_flag[bop] = 0)
-#define C(k) add_opt_method(rb_c##k, mid, bop)
-    OP(PLUS, PLUS), (C(Fixnum), C(Float), C(String), C(Array));
-    OP(MINUS, MINUS), (C(Fixnum), C(Float));
-    OP(MULT, MULT), (C(Fixnum), C(Float));
-    OP(DIV, DIV), (C(Fixnum), C(Float));
-    OP(MOD, MOD), (C(Fixnum), C(Float));
-    OP(Eq, EQ), (C(Fixnum), C(Float), C(String));
-    OP(Eqq, EQQ), (C(Fixnum), C(Bignum), C(Float), C(Symbol), C(String));
-    OP(LT, LT), (C(Fixnum), C(Float));
-    OP(LE, LE), (C(Fixnum), C(Float));
-    OP(GT, GT), (C(Fixnum), C(Float));
-    OP(GE, GE), (C(Fixnum), C(Float));
-    OP(LTLT, LTLT), (C(String), C(Array));
-    OP(AREF, AREF), (C(Array), C(Hash));
-    OP(ASET, ASET), (C(Array), C(Hash));
-    OP(Length, LENGTH), (C(Array), C(String), C(Hash));
-    OP(Size, SIZE), (C(Array), C(String), C(Hash));
-    OP(EmptyP, EMPTY_P), (C(Array), C(String), C(Hash));
-    OP(Succ, SUCC), (C(Fixnum), C(String), C(Time));
-    OP(EqTilde, MATCH), (C(Regexp), C(String));
-    OP(Freeze, FREEZE), (C(String));
-#undef C
-#undef OP
+    vm_init_redefined_flags(vm_opt_method_table); /* opt_method.h.tmpl */
 }
 
 /* for vm development */
diff --git a/vm_core.h b/vm_core.h
index d4db5d9..3ae2d4b 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -24,6 +24,7 @@
 #include "method.h"
 #include "ruby_atomic.h"
 #include "ccan/list/list.h"
+#include "opt_method.h"
 
 #include "ruby/thread_native.h"
 #if   defined(_WIN32)
@@ -320,33 +321,6 @@ enum ruby_special_exceptions {
     ruby_special_error_count
 };
 
-enum ruby_basic_operators {
-    BOP_PLUS,
-    BOP_MINUS,
-    BOP_MULT,
-    BOP_DIV,
-    BOP_MOD,
-    BOP_EQ,
-    BOP_EQQ,
-    BOP_LT,
-    BOP_LE,
-    BOP_LTLT,
-    BOP_AREF,
-    BOP_ASET,
-    BOP_LENGTH,
-    BOP_SIZE,
-    BOP_EMPTY_P,
-    BOP_SUCC,
-    BOP_GT,
-    BOP_GE,
-    BOP_NOT,
-    BOP_NEQ,
-    BOP_MATCH,
-    BOP_FREEZE,
-
-    BOP_LAST_
-};
-
 #define GetVMPtr(obj, ptr) \
   GetCoreDataFromValue((obj), rb_vm_t, (ptr))
 
@@ -441,7 +415,7 @@ typedef struct rb_vm_struct {
 	size_t fiber_machine_stack_size;
     } default_params;
 
-    short redefined_flag[BOP_LAST_];
+    rb_om_bitmap_t redefined_flag[OM_SIZE_];
 } rb_vm_t;
 
 /* default values */
@@ -458,18 +432,8 @@ typedef struct rb_vm_struct {
 #define RUBY_VM_FIBER_MACHINE_STACK_SIZE      (  64 * 1024 * sizeof(VALUE)) /*  256 KB or  512 KB */
 #define RUBY_VM_FIBER_MACHINE_STACK_SIZE_MIN  (  16 * 1024 * sizeof(VALUE)) /*   64 KB or  128 KB */
 
-/* optimize insn */
-#define FIXNUM_REDEFINED_OP_FLAG (1 << 0)
-#define FLOAT_REDEFINED_OP_FLAG  (1 << 1)
-#define STRING_REDEFINED_OP_FLAG (1 << 2)
-#define ARRAY_REDEFINED_OP_FLAG  (1 << 3)
-#define HASH_REDEFINED_OP_FLAG   (1 << 4)
-#define BIGNUM_REDEFINED_OP_FLAG (1 << 5)
-#define SYMBOL_REDEFINED_OP_FLAG (1 << 6)
-#define TIME_REDEFINED_OP_FLAG   (1 << 7)
-#define REGEXP_REDEFINED_OP_FLAG (1 << 8)
-
-#define BASIC_OP_UNREDEFINED_P(op, klass) (LIKELY((GET_VM()->redefined_flag[(op)]&(klass)) == 0))
+#define BASIC_OP_UNREDEFINED_P(mid, klass) \
+	rb_basic_op_unredefined_p(OM_##mid##__##klass)
 
 #ifndef VM_DEBUG_BP_CHECK
 #define VM_DEBUG_BP_CHECK 0
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 05ed3c6..2aedb46 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -872,17 +872,17 @@ VALUE
 opt_eq_func(VALUE recv, VALUE obj, CALL_INFO ci)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_EQ, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idEq, Fixnum)) {
 	return (recv == obj) ? Qtrue : Qfalse;
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_EQ, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idEq, Float)) {
 	return (recv == obj) ? Qtrue : Qfalse;
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat &&
 	    RBASIC_CLASS(obj) == rb_cFloat &&
-	    BASIC_OP_UNREDEFINED_P(BOP_EQ, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idEq, Float)) {
 	    double a = RFLOAT_VALUE(recv);
 	    double b = RFLOAT_VALUE(obj);
 
@@ -893,7 +893,7 @@ opt_eq_func(VALUE recv, VALUE obj, CALL_INFO ci)
 	}
 	else if (RBASIC_CLASS(recv) == rb_cString &&
 		 RBASIC_CLASS(obj) == rb_cString &&
-		 BASIC_OP_UNREDEFINED_P(BOP_EQ, STRING_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idEq, String)) {
 	    return rb_str_equal(recv, obj);
 	}
     }
diff --git a/vm_insnhelper.h b/vm_insnhelper.h
index 31f8ffc..a4290ee 100644
--- a/vm_insnhelper.h
+++ b/vm_insnhelper.h
@@ -229,5 +229,28 @@ enum vm_regan_acttype {
 static VALUE make_no_method_exception(VALUE exc, const char *format,
 				      VALUE obj, int argc, const VALUE *argv);
 
-
+static inline int
+rb_basic_op_unredefined_p(enum ruby_optimized_method om)
+{
+    unsigned int i = om / OM_ALIGN_;
+    rb_om_bitmap_t mask = (rb_om_bitmap_t)(1U << (om % OM_ALIGN_));
+
+    return LIKELY((GET_VM()->redefined_flag[i] & mask) == 0);
+}
+
+static inline int
+rb_basic_mask_unredefined_p(enum ruby_optimized_method om)
+{
+    unsigned int uom = (unsigned int)-om;
+    unsigned int offset = 0xffU & (uom >> OM_ALIGN_);
+    rb_om_bitmap_t mask = (rb_om_bitmap_t)(OM_GETMASK_ & uom);
+
+    return LIKELY((GET_VM()->redefined_flag[offset] & mask) == 0);
+}
+
+static inline int
+rb_opt_method_is_mask(enum ruby_optimized_method om)
+{
+    return !!((int)om < 0);
+}
 #endif /* RUBY_INSNHELPER_H */
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 07/13] fix mismerge
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
                   ` (4 preceding siblings ...)
  2014-10-17  6:19 ` [PATCH 06/13] vm: automatically define optimized method enums Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  2014-10-17  6:19 ` [PATCH 08/13] optimize string allocations for sub/gsub/tr/tr_s(!) Eric Wong
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

---
 test/ruby/test_string.rb | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index d3357b0..245a043 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2306,13 +2306,6 @@ class TestString < Test::Unit::TestCase
       assert_equal before + nr, GC.stat(:total_allocated_objects)
     end
   end
-end
-
-class TestString2 < TestString
-  def initialize(*args)
-    super
-    @cls = S2
-  end
 
   def test_opt_str_lit
     assert_separately([], <<-RUBY)
@@ -2396,7 +2389,7 @@ class TestString2 < TestString
       before = GC.stat(:total_allocated_objects)
       nr.times { res << (recv == "constant") } # opt_streq1
       nr.times { res << ("constant" == recv) } # opt_streq2
-      nr.times { res << ("something " != recv) } # 1st pass peephole
+      nr.times { res << ("something" != recv) } # 1st pass peephole
       nr.times { res << ("constant" == recv) } # opt_streq2
       nr.times { res << ("constant" === recv) } # opt_streqq2
       nr.times { res << (recv != "something") }  # 2nd pass peephole
@@ -2439,3 +2432,10 @@ class TestString2 < TestString
     end
   end
 end
+
+class TestString2 < TestString
+  def initialize(*args)
+    super
+    @cls = S2
+  end
+end
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 08/13] optimize string allocations for sub/gsub/tr/tr_s(!)
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
                   ` (5 preceding siblings ...)
  2014-10-17  6:19 ` [PATCH 07/13] fix mismerge Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  2014-10-17  6:19 ` [PATCH 09/13] compile.c (opt_str_lit_1): hoist out of iseq_peephole_optimize Eric Wong
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

Improvements across the board (I haven't tested each method, yet,
but the code changes are DRY enough for me to trust it).
-----------------------------------------------------------
raw data:
[["loop_whileloop2",
  [[0.09299727622419596, 0.09182057436555624, 0.09192507807165384],
   [0.09175324533134699, 0.0917150853201747, 0.09169120714068413]]],
 ["vm2_gsub_bang_lit",
  [[1.28092535212636, 1.212415685877204, 1.2110721664503217],
   [0.6577691622078419, 0.6574153359979391, 0.657793253660202]]],
 ["vm2_gsub_bang_re",
  [[1.5643755476921797, 1.5549067426472902, 1.5551356291398406],
   [1.2313632098957896, 1.2304210113361478, 1.2378935469314456]]],
 ["vm2_gsub_re",
  [[1.7676676837727427, 1.8015323374420404, 1.7688637850806117],
   [1.54728907905519, 1.530972053296864, 1.5956763122230768]]]]

Elapsed time: 24.618900503 (sec)
-----------------------------------------------------------
benchmark results:
minimum results in each 3 measurements.
Execution time (sec)
name	trunk	built
loop_whileloop2	0.092	0.092
vm2_gsub_bang_lit*	1.119	0.566
vm2_gsub_bang_re*	1.463	1.139
vm2_gsub_re*	1.676	1.439

Speedup ratio: compare with the result of `trunk' (greater is better)
name	built
loop_whileloop2	1.001
vm2_gsub_bang_lit*	1.978
vm2_gsub_bang_re*	1.285
vm2_gsub_re*	1.164

-----------------------------------------------------------
raw data:

[["loop_whileloop2",
  [[0.10830817837268114, 0.09708951227366924, 0.0952271893620491],
   [0.09657043684273958, 0.09450766257941723, 0.0936131589114666]]],
 ["vm2_tr_bang",
  [[2.8116708220914006, 2.6677203606814146, 2.7375484481453896],
   [1.6796088377013803, 1.6209765998646617, 1.7005833145231009]]]]

Elapsed time: 13.805532977 (sec)
-----------------------------------------------------------
benchmark results:
minimum results in each 3 measurements.
Execution time (sec)
name	trunk	built
loop_whileloop2	0.095	0.094
vm2_tr_bang*	2.572	1.527

Speedup ratio: compare with the result of `trunk' (greater is better)
name	built
loop_whileloop2	1.017
vm2_tr_bang*	1.684
---
 benchmark/bm_vm2_gsub_bang_lit.rb |  6 +++++
 benchmark/bm_vm2_gsub_bang_re.rb  |  6 +++++
 benchmark/bm_vm2_gsub_re.rb       |  6 +++++
 benchmark/bm_vm2_tr_bang.rb       |  7 ++++++
 compile.c                         | 52 +++++++++++++++++++++++++++++++++++++++
 defs/id.def                       |  9 +++++++
 defs/opt_method.def               |  8 ++++++
 test/ruby/test_string.rb          | 34 +++++++++++++++++++++++++
 8 files changed, 128 insertions(+)
 create mode 100644 benchmark/bm_vm2_gsub_bang_lit.rb
 create mode 100644 benchmark/bm_vm2_gsub_bang_re.rb
 create mode 100644 benchmark/bm_vm2_gsub_re.rb
 create mode 100644 benchmark/bm_vm2_tr_bang.rb

diff --git a/benchmark/bm_vm2_gsub_bang_lit.rb b/benchmark/bm_vm2_gsub_bang_lit.rb
new file mode 100644
index 0000000..9251fb1
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_bang_lit.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.gsub!("nomatch", "")
+end
diff --git a/benchmark/bm_vm2_gsub_bang_re.rb b/benchmark/bm_vm2_gsub_bang_re.rb
new file mode 100644
index 0000000..e5fc9ea
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_bang_re.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.gsub!(/a/, "")
+end
diff --git a/benchmark/bm_vm2_gsub_re.rb b/benchmark/bm_vm2_gsub_re.rb
new file mode 100644
index 0000000..606f247
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_re.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.gsub(/a/, "")
+end
diff --git a/benchmark/bm_vm2_tr_bang.rb b/benchmark/bm_vm2_tr_bang.rb
new file mode 100644
index 0000000..8065a65
--- /dev/null
+++ b/benchmark/bm_vm2_tr_bang.rb
@@ -0,0 +1,7 @@
+i = 0
+str = "a"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.tr!("a", "A")
+  str.tr!("A", "a")
+end
diff --git a/compile.c b/compile.c
index 7d6771b..205ff6a 100644
--- a/compile.c
+++ b/compile.c
@@ -1745,6 +1745,54 @@ new_recvinfo_for_arg_(rb_iseq_t *iseq, VALUE str,
     return ri;
 }
 
+/*
+ * optimize common calls which take two string literals:
+ *   foo.sub(/../, "to")
+ *   foo.sub!(/../, "to")
+ *   foo.gsub(/../, "to")
+ *   foo.gsub!(/../, "to")
+ *   foo.tr(/../, "to")
+ *   foo.tr!(/../, "to")
+ *   foo.tr_s(/../, "to")
+ *   foo.tr_s!(/../, "to")
+ */
+static VALUE
+opt_str_lit_2(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
+{
+    INSN *piobj;
+    enum ruby_optimized_method om = OM_LAST_;
+
+    switch (ci->mid) {
+#define C(mid) case mid: om = OM_##mid##__String; break
+      C(idSub);
+      C(idSub_bang);
+      C(idGsub);
+      C(idGsub_bang);
+      C(idTr);
+      C(idTr_bang);
+      C(idTr_s);
+      C(idTr_s_bang);
+#undef C
+      default: return Qfalse;
+    }
+
+    /*
+     * previous arg may be a string literal, too:
+     *   foo.gsub!("from", "to")
+     *   foo.tr!("from", "to")
+     *   ..
+     */
+    piobj = (INSN *)get_prev_insn(list);
+    if (piobj && piobj->insn_id == BIN(putstring)) {
+	VALUE pstr = piobj->operands[0];
+	VALUE pri = new_recvinfo_for_arg_(iseq, pstr, om, rb_cString, 0);
+	piobj->operands[0] = pri;
+	piobj->insn_id = BIN(opt_str_lit);
+    }
+
+    return new_recvinfo_for_arg_(iseq, str, om, rb_cString, 1);
+}
+
 static int
 iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcallopt)
 {
@@ -1921,6 +1969,10 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 			ri = new_recvinfo_for_arg(iseq, str, idEqq, String, 0);
 			break;
 		    }
+		    break;
+		  case 2:
+		    ri = opt_str_lit_2(iseq, str, ci, (INSN *)list);
+		    break;
 		}
 		if (ri != Qfalse) {
 		    iobj->insn_id = BIN(opt_str_lit);
diff --git a/defs/id.def b/defs/id.def
index f7fffbd..21aff93 100644
--- a/defs/id.def
+++ b/defs/id.def
@@ -57,6 +57,14 @@ firstline, predefined = __LINE__+1, %[\
   core#hash_merge_ary
   core#hash_merge_ptr
   core#hash_merge_kwd
+  gsub
+  gsub!
+  sub
+  sub!
+  tr
+  tr!
+  tr_s
+  tr_s!
 ]
 
 class KeywordError < RuntimeError
@@ -83,6 +91,7 @@ predefined.split(/^/).each_with_index do |line, num|
     token = "_#{token.gsub(/\W+/, '_')}"
   else
     token = token.sub(/\?/, 'P').sub(/\A[a-z]/) {$&.upcase}
+    token.sub!(/!\z/, "_bang")
     token.sub!(/\A\$/, "_G_")
     token.sub!(/\A@@/, "_C_")
     token.sub!(/\A@/, "_I_")
diff --git a/defs/opt_method.def b/defs/opt_method.def
index acc5e6b..e96cc9b 100644
--- a/defs/opt_method.def
+++ b/defs/opt_method.def
@@ -28,6 +28,14 @@ OPT_METHODS = [
   %w(idSucc Fixnum String Time),
   %w(idEqTilde Regexp String),
   %w(idFreeze String),
+  %w(idGsub String),
+  %w(idGsub_bang String),
+  %w(idSub String),
+  %w(idSub_bang String),
+  %w(idTr String),
+  %w(idTr_bang String),
+  %w(idTr_s String),
+  %w(idTr_s_bang String),
 ]
 
 # for checking optimized classes,
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 245a043..8d46764 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2431,6 +2431,40 @@ class TestString < Test::Unit::TestCase
       assert_equal [ "aaa" ], res.uniq!
     end
   end
+
+  def assert_no_new_allocations(mesg = "", adjust = 0)
+    before = GC.stat(:total_allocated_objects)
+    yield
+    after = GC.stat(:total_allocated_objects)
+    assert_equal before, after - adjust, mesg
+  end
+
+  def test_opt_str_lit_gsub
+    return if @cls != String
+    require_compile_option(:peephole_optimization)
+    foo = "foo"
+    re = /nomatch/
+    foo.gsub!(re, "00") # compile regexp
+    n = 3
+
+    assert_no_new_allocations("gsub var regexp") do
+      n.times { foo.gsub!(re, "00") }
+    end
+
+    # compiles re once:
+    assert_no_new_allocations("gsub lit regexp", 1) do
+      n.times { foo.gsub!(/nomatch/, "00") }
+    end
+
+    assert_no_new_allocations("gsub literal string") do
+      n.times { foo.gsub!("nomatch", "00") }
+    end
+
+    ary = [ [ re ] ]
+    assert_no_new_allocations("bigger stack") do
+      n.times { foo.gsub!(ary[0][0], "00") }
+    end
+  end
 end
 
 class TestString2 < TestString
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 09/13] compile.c (opt_str_lit_1): hoist out of iseq_peephole_optimize
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
                   ` (6 preceding siblings ...)
  2014-10-17  6:19 ` [PATCH 08/13] optimize string allocations for sub/gsub/tr/tr_s(!) Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  2014-10-17  6:19 ` [PATCH 10/13] test/ruby/test_string.rb: cleanup allocation tests Eric Wong
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

---
 compile.c | 67 ++++++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/compile.c b/compile.c
index 205ff6a..f1123cc 100644
--- a/compile.c
+++ b/compile.c
@@ -1746,21 +1746,51 @@ new_recvinfo_for_arg_(rb_iseq_t *iseq, VALUE str,
 }
 
 /*
+ * optimize allocation:
+ *   hash["lit"] # hash lookups
+ *   str == "lit"
+ *   str != "lit"
+ *   str << "lit"
+ *   str + "lit"
+ *   str === "lit"
+ */
+static VALUE
+opt_str_lit_1(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
+{
+    enum ruby_optimized_method om;
+    VALUE c;
+
+    switch (ci->mid) {
+#define C(mid,klass) case mid: om = OM_##mid##__##klass; c = rb_c##klass; break
+      C(idAREF, Hash);
+      C(idEq, String);
+      C(idNeq, String);
+      C(idLTLT, String);
+      C(idPLUS, String);
+      C(idEqq, String);
+#undef C
+      default: return Qfalse;
+    }
+
+    return new_recvinfo_for_arg_(iseq, str, om, c, 0);
+}
+
+/*
  * optimize common calls which take two string literals:
  *   foo.sub(/../, "to")
  *   foo.sub!(/../, "to")
  *   foo.gsub(/../, "to")
  *   foo.gsub!(/../, "to")
- *   foo.tr(/../, "to")
- *   foo.tr!(/../, "to")
- *   foo.tr_s(/../, "to")
- *   foo.tr_s!(/../, "to")
+ *   foo.tr("from", "to")
+ *   foo.tr!("from", "to")
+ *   foo.tr_s("from", "to")
+ *   foo.tr_s!("from", "to")
  */
 static VALUE
 opt_str_lit_2(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
 {
     INSN *piobj;
-    enum ruby_optimized_method om = OM_LAST_;
+    enum ruby_optimized_method om;
 
     switch (ci->mid) {
 #define C(mid) case mid: om = OM_##mid##__String; break
@@ -1943,32 +1973,7 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 		    }
 		    break;
 		  case 1:
-		    switch (ci->mid) {
-		      case idAREF:
-			/* optimize allocation: obj["lit"] */
-			ri = new_recvinfo_for_arg(iseq, str, idAREF, Hash, 0);
-			break;
-		      case idEq:
-			/* optimize allocation: obj == "lit" */
-			ri = new_recvinfo_for_arg(iseq, str, idEq, String, 0);
-			break;
-		      case idNeq:
-			/* optimize allocation: obj != "lit" */
-			ri = new_recvinfo_for_arg(iseq, str, idNeq, String, 0);
-			break;
-		      case idLTLT:
-			/* optimize allocation: obj << "lit" */
-			ri = new_recvinfo_for_arg(iseq, str, idLTLT, String, 0);
-			break;
-		      case idPLUS:
-			/* optimize allocation: obj + "lit" */
-			ri = new_recvinfo_for_arg(iseq, str, idPLUS, String, 0);
-			break;
-		      case idEqq:
-			/* optimize allocation: obj === "lit" */
-			ri = new_recvinfo_for_arg(iseq, str, idEqq, String, 0);
-			break;
-		    }
+		    ri = opt_str_lit_1(iseq, str, ci, (INSN *)list);
 		    break;
 		  case 2:
 		    ri = opt_str_lit_2(iseq, str, ci, (INSN *)list);
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 10/13] test/ruby/test_string.rb: cleanup allocation tests
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
                   ` (7 preceding siblings ...)
  2014-10-17  6:19 ` [PATCH 09/13] compile.c (opt_str_lit_1): hoist out of iseq_peephole_optimize Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  2014-10-17  6:19 ` [PATCH 11/13] test_string: cleanup tests Eric Wong
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

---
 test/ruby/test_string.rb | 76 ++++++++++++++++++++++++------------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 8d46764..3d82bb3 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1910,9 +1910,9 @@ class TestString < Test::Unit::TestCase
 
   def test_literal_freeze
     require_compile_option(:peephole_optimization)
-    before = GC.stat(:total_allocated_objects)
-    5.times { "".freeze }
-    assert_equal before, GC.stat(:total_allocated_objects)
+    assert_no_new_allocations do
+      5.times { "".freeze }
+    end
   end
 
   class S2 < String
@@ -2296,14 +2296,14 @@ class TestString < Test::Unit::TestCase
     if @cls == String
       nr = 10
       recv = ""
-      before = GC.stat(:total_allocated_objects)
-      nr.times { recv << "constant" }
-      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_no_new_allocations do
+        nr.times { recv << "constant" }
+      end
       assert_equal "constant" * nr, recv
 
-      before = GC.stat(:total_allocated_objects)
-      nr.times { "recv" << "constant" }
-      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_no_new_allocations("'lit' << 'lit' (LTLT)", nr) do
+        nr.times { "recv" << "constant" }
+      end
     end
   end
 
@@ -2386,48 +2386,48 @@ class TestString < Test::Unit::TestCase
 
       recv = "something"
       res = []
-      before = GC.stat(:total_allocated_objects)
-      nr.times { res << (recv == "constant") } # opt_streq1
-      nr.times { res << ("constant" == recv) } # opt_streq2
-      nr.times { res << ("something" != recv) } # 1st pass peephole
-      nr.times { res << ("constant" == recv) } # opt_streq2
-      nr.times { res << ("constant" === recv) } # opt_streqq2
-      nr.times { res << (recv != "something") }  # 2nd pass peephole
-      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_no_new_allocations("false comparisons") do
+        nr.times { res << (recv == "constant") } # opt_streq1
+        nr.times { res << ("constant" == recv) } # opt_streq2
+        nr.times { res << ("something" != recv) } # 1st pass peephole
+        nr.times { res << ("constant" == recv) } # opt_streq2
+        nr.times { res << ("constant" === recv) } # opt_streqq2
+        nr.times { res << (recv != "something") }  # 2nd pass peephole
+      end
       assert_equal [ false ], res.uniq!
 
       res.clear
-      before = GC.stat(:total_allocated_objects)
-      nr.times { res << (recv == "something") } # opt_streq1
-      nr.times { res << ("something" == recv) } # opt_streq2
-      nr.times { res << ("something" === recv) } # opt_streqq2
-      nr.times { res << (recv === "something") } # opt_streqq2
-      nr.times { res << ("constant" != recv) } # 1st pass peephole
-      nr.times { res << (recv != "constant") } # 2nd pass peephole
-      nr.times { res << ("a" != "b") } # 1st pass peephole
-      nr.times { res << ("a" == "a") } # 1st pass peephole
-      nr.times { res << ("".size == 0) } # 2nd pass peephole
-      nr.times { res << ("".length == 0) } # 2nd pass peephole
-      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_no_new_allocations("true comparisons") do
+        nr.times { res << (recv == "something") } # opt_streq1
+        nr.times { res << ("something" == recv) } # opt_streq2
+        nr.times { res << ("something" === recv) } # opt_streqq2
+        nr.times { res << (recv === "something") } # opt_streqq2
+        nr.times { res << ("constant" != recv) } # 1st pass peephole
+        nr.times { res << (recv != "constant") } # 2nd pass peephole
+        nr.times { res << ("a" != "b") } # 1st pass peephole
+        nr.times { res << ("a" == "a") } # 1st pass peephole
+        nr.times { res << ("".size == 0) } # 2nd pass peephole
+        nr.times { res << ("".length == 0) } # 2nd pass peephole
+      end
       assert_equal [ true ], res.uniq!
 
       # :+ optimizations
       res.clear
-      before = GC.stat(:total_allocated_objects)
-      nr.times { res << ("foo" + recv) }
-      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_no_new_allocations("'str' + (PLUS)", nr) do
+        nr.times { res << ("foo" + recv) }
+      end
       assert_equal [ "foosomething" ], res.uniq!
 
       res.clear
-      before = GC.stat(:total_allocated_objects)
-      nr.times { res << (recv + "foo") }
-      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_no_new_allocations("+ 'str' (PLUS)", nr) do
+        nr.times { res << (recv + "foo") }
+      end
       assert_equal [ "somethingfoo" ], res.uniq!
 
       res.clear
-      before = GC.stat(:total_allocated_objects)
-      nr.times { res << ('a' * 3) }
-      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_no_new_allocations("'str' * (MULT)", nr) do
+        nr.times { res << ('a' * 3) }
+      end
       assert_equal [ "aaa" ], res.uniq!
     end
   end
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 11/13] test_string: cleanup tests
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
                   ` (8 preceding siblings ...)
  2014-10-17  6:19 ` [PATCH 10/13] test/ruby/test_string.rb: cleanup allocation tests Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  2014-10-17  6:19 ` [PATCH 12/13] opt_method.inc.tmpl: flatten Eric Wong
  2014-10-17  6:19 ` [PATCH 13/13] opt_str_lit: switch to type mask for raw class comparisons Eric Wong
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

---
 test/ruby/test_string.rb | 63 ------------------------------------------------
 1 file changed, 63 deletions(-)

diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 3d82bb3..798984d 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2281,18 +2281,6 @@ class TestString < Test::Unit::TestCase
   # enable only when string size range is smaller than memory space
 
   def test_opt_strcat_with
-    assert_separately([], <<-RUBY)
-      class String
-        undef <<
-        def <<(str)
-          "overridden"
-        end
-      end
-      assert_equal("overridden", "" << "foo")
-      foo = "foo"
-      assert_equal("overridden", foo << "bar")
-    RUBY
-
     if @cls == String
       nr = 10
       recv = ""
@@ -2310,57 +2298,6 @@ class TestString < Test::Unit::TestCase
   def test_opt_str_lit
     assert_separately([], <<-RUBY)
       class String
-        undef ==
-        def ==(str)
-          :TROO
-        end
-      end
-      foo = "foo"
-      assert_equal(:TROO, (foo == "foo"), 'string == "peephole 2nd pass"')
-      assert_equal(:TROO, ("foo" == foo), '"yoda 1st pass" == string')
-    RUBY
-
-    assert_separately([], <<-RUBY)
-      class String
-        undef !=
-        def !=(str)
-          :NOT
-        end
-      end
-      foo = ""
-      assert_equal(:NOT, ("foo" != foo), '"yoda 1st pass" != string')
-      assert_equal(:NOT, (foo != "foo"), 'string != "peephole 2nd pass"')
-    RUBY
-
-    assert_separately([], <<-RUBY)
-      class String
-        undef size
-        undef length
-        def size
-          42
-        end
-        def length
-          42
-        end
-      end
-      assert_equal(42, "".size, 'lit string size')
-      assert_equal(42, "".length, 'lit string size')
-    RUBY
-
-    assert_separately([], <<-RUBY)
-      class String
-        undef +
-        def +(other)
-          :plus
-        end
-      end
-      foo = "a"
-      assert_equal(:plus, "" + foo, 'lit plus')
-      assert_equal(:plus, foo + "", 'plus lit')
-    RUBY
-
-    assert_separately([], <<-RUBY)
-      class String
         undef *
         def *(other)
           :mult
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 12/13] opt_method.inc.tmpl: flatten
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
                   ` (9 preceding siblings ...)
  2014-10-17  6:19 ` [PATCH 11/13] test_string: cleanup tests Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  2014-10-17  6:19 ` [PATCH 13/13] opt_str_lit: switch to type mask for raw class comparisons Eric Wong
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

---
 template/opt_method.inc.tmpl | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/template/opt_method.inc.tmpl b/template/opt_method.inc.tmpl
index 0501121..acbdc1a 100644
--- a/template/opt_method.inc.tmpl
+++ b/template/opt_method.inc.tmpl
@@ -23,17 +23,10 @@ vm_init_redefined_flags(void *tbl)
 {
 <%
 OPT_METHODS.each do |(mid, *classes)|
-  classes.each do |klass|
-    if Array === klass
-      klass.each do |k|
-%>
-    add_opt_method(tbl, rb_c<%= k %>, <%= mid %>, <%= om(mid, k) %>);
-<%
-      end # klass.each
-    else
+  classes.flatten.each do |klass|
 %>
     add_opt_method(tbl, rb_c<%= klass %>, <%= mid %>, <%= om(mid, klass) %>);
-<%  end # !(Array === klass)
+<%
   end # classes.each
 end # OPT_METHODS.each
 %>
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 13/13] opt_str_lit: switch to type mask for raw class comparisons
  2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
                   ` (10 preceding siblings ...)
  2014-10-17  6:19 ` [PATCH 12/13] opt_method.inc.tmpl: flatten Eric Wong
@ 2014-10-17  6:19 ` Eric Wong
  11 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-10-17  6:19 UTC (permalink / raw)
  To: spew

This allows optimizations for method names common to multiple
classes.
---
 compile.c                  | 22 ++++++++++++++--------
 defs/opt_method.def        |  4 ++++
 insns.def                  | 19 +++++++++++++------
 template/opt_method.h.tmpl | 33 +++++++++++++++++++++++++++++++++
 4 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/compile.c b/compile.c
index f1123cc..d22185b 100644
--- a/compile.c
+++ b/compile.c
@@ -1731,13 +1731,15 @@ new_recvinfo_for_call_(rb_iseq_t *iseq, VALUE str,
 }
 
 #define new_recvinfo_for_arg(iseq,str,mid,klass,off) \
-    new_recvinfo_for_arg_((iseq),(str),OM_##mid##__##klass,(rb_c##klass),(off))
+    new_recvinfo_for_arg_((iseq),(str),(OM_##mid##__##klass),\
+                          (OM_TMASK_##klass),(off))
 static VALUE
 new_recvinfo_for_arg_(rb_iseq_t *iseq, VALUE str,
-		enum ruby_optimized_method om, VALUE klass, int recv_off)
+		enum ruby_optimized_method om,
+		enum ruby_optimized_method tmask, int recv_off)
 {
     VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(om),
-				    klass, INT2FIX(recv_off));
+				    INT2FIX(tmask), INT2FIX(recv_off));
 
     hide_obj(ri);
     iseq_add_mark_object(iseq, ri);
@@ -1758,10 +1760,14 @@ static VALUE
 opt_str_lit_1(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
 {
     enum ruby_optimized_method om;
-    VALUE c;
+    enum ruby_optimized_method tmask;
 
     switch (ci->mid) {
-#define C(mid,klass) case mid: om = OM_##mid##__##klass; c = rb_c##klass; break
+#define C(mid,klass) \
+  case mid: \
+    om = OM_##mid##__##klass; \
+    tmask = OM_TMASK_##klass; \
+    break
       C(idAREF, Hash);
       C(idEq, String);
       C(idNeq, String);
@@ -1772,7 +1778,7 @@ opt_str_lit_1(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
       default: return Qfalse;
     }
 
-    return new_recvinfo_for_arg_(iseq, str, om, c, 0);
+    return new_recvinfo_for_arg_(iseq, str, om, tmask, 0);
 }
 
 /*
@@ -1815,12 +1821,12 @@ opt_str_lit_2(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
     piobj = (INSN *)get_prev_insn(list);
     if (piobj && piobj->insn_id == BIN(putstring)) {
 	VALUE pstr = piobj->operands[0];
-	VALUE pri = new_recvinfo_for_arg_(iseq, pstr, om, rb_cString, 0);
+	VALUE pri = new_recvinfo_for_arg_(iseq, pstr, om, OM_TMASK_String, 0);
 	piobj->operands[0] = pri;
 	piobj->insn_id = BIN(opt_str_lit);
     }
 
-    return new_recvinfo_for_arg_(iseq, str, om, rb_cString, 1);
+    return new_recvinfo_for_arg_(iseq, str, om, OM_TMASK_String, 1);
 }
 
 static int
diff --git a/defs/opt_method.def b/defs/opt_method.def
index e96cc9b..0be4f20 100644
--- a/defs/opt_method.def
+++ b/defs/opt_method.def
@@ -55,3 +55,7 @@ def om(mid, klass)
     "OM_#{mid}__#{klass}"
   end
 end
+
+IS_T_DATA = {
+  "Time" => true
+}
diff --git a/insns.def b/insns.def
index e304338..0487072 100644
--- a/insns.def
+++ b/insns.def
@@ -369,8 +369,8 @@ opt_str_lit
      * 0 - str
      * 1 - optimized method flag (OM_*)
      * optional:
-     * 2 - Class (optimized receiver class) or Symbol (method name)
-     * 3 - stack offset (Fixint), only present if [3] is a Class,
+     * 2 - class mask (optimized receiver classes) or Symbol (method name)
+     * 3 - stack offset (Fixint), only present if [2] is a Class,
      *     -1 stack offset means receiver is the frozen string literal itself
      */
     const VALUE *ri = RARRAY_CONST_PTR(recv_info);
@@ -386,12 +386,19 @@ opt_str_lit
 	    int n = FIX2INT(ri[3]);
 	    VALUE recv = n < 0 ? val : TOPN(n);
 
-	    if (SPECIAL_CONST_P(recv) ||
-		    RBASIC_CLASS(recv) != msym_or_class ||
-		    !rb_basic_op_unredefined_p(om)) {
-		/* bad, somebody redefined an optimized method, slow path: */
+	    if (SPECIAL_CONST_P(recv)) {
 		val = rb_str_resurrect(val);
 	    }
+	    else {
+		enum ruby_value_type btype = BUILTIN_TYPE(recv);
+
+		if (!((1U << btype) & FIX2INT(ri[2])) ||
+		    (rb_opt_method_class(btype) != RBASIC_CLASS(recv)) ||
+		    !rb_basic_op_unredefined_p(om))
+		{
+		    val = rb_str_resurrect(val);
+		}
+	    }
 	}
 	else { /* receiver is the string literal itself (e.g. "str".freeze) */
 	    if (!rb_basic_op_unredefined_p(om)) {
diff --git a/template/opt_method.h.tmpl b/template/opt_method.h.tmpl
index 39c4043..a2b2989 100644
--- a/template/opt_method.h.tmpl
+++ b/template/opt_method.h.tmpl
@@ -10,12 +10,14 @@ typedef uint<%= OM_ALIGN %>_t rb_om_bitmap_t;
 enum ruby_optimized_method {
 <%
 opt_masks = {}
+mask_classes = {}
 n = 0
 OPT_METHODS.each do |(mid, *classes)|
   classes.each do |klass|
     if Array === klass
       opt_masks[mid] = klass.dup
       # we will align these in the second loop, below
+      klass.each { |k| mask_classes[k] = true }
       next
     end %>
     <%= om(mid, klass) %> = <%= n += 1 %>,
@@ -60,12 +62,43 @@ opt_masks.each do |mid, c|
       c.map { |k| "(1U << (#{om(mid, k)} % #{OM_ALIGN}))" }.join(sep) + # mask
       ')'
   %>),
+
+    /*
+     * mask for type checking in insns.def, we name this like the OM_*
+     * enum so it is easy to get this name using CPP macros
+     */
+    <%= "OM_TMASK_#{c.join('_')}" %> = (<%=
+      c.map {|k| "(1U << RUBY_T_#{k.upcase})" }.join(sep) %>),
+
 <%
 end # opt_masks.each
+opt_classes.each_key do |k|
+  next if IS_T_DATA[k]
+%>
+    <%= "OM_TMASK_#{k} = (1U << RUBY_T_#{k.upcase})" %>,
+<%
+end # opt_classes.each_key
 %>
     OM_ALIGN_ = <%= OM_ALIGN %>,
     OM_SIZE_ = <%= ((om_last + OM_ALIGN) & OM_ALIGN_MASK) / OM_ALIGN %>,
     OM_GETMASK_ = (1 << OM_ALIGN_) - 1
 };
 
+/* map a raw type to the preferred (optimized) class */
+static inline VALUE
+rb_opt_method_class(enum ruby_value_type type)
+{
+    switch (type) {
+<%
+opt_classes.each_key do |k|
+  next if IS_T_DATA[k]
+%>
+      case RUBY_T_<%= k.upcase %>: return rb_c<%= k %>;
+<%
+end
+%>
+      default: return Qfalse;
+    }
+}
+
 #endif /* RUBY_OPT_METHOD_H */
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2014-10-17  6:20 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-17  6:19 [PATCH 01/13] compile.c: move "literal" optimizations to peephole optimize Eric Wong
2014-10-17  6:19 ` [PATCH 02/13] add generic and flexible opt_str_lit insn Eric Wong
2014-10-17  6:19 ` [PATCH 03/13] compile.c: optimize << and == using putstring_for Eric Wong
2014-10-17  6:19 ` [PATCH 04/13] opt_str_lit: further optimizations and cleanups Eric Wong
2014-10-17  6:19 ` [PATCH 05/13] opt_str_lit: optimize allocations for +, %, * and === calls Eric Wong
2014-10-17  6:19 ` [PATCH 06/13] vm: automatically define optimized method enums Eric Wong
2014-10-17  6:19 ` [PATCH 07/13] fix mismerge Eric Wong
2014-10-17  6:19 ` [PATCH 08/13] optimize string allocations for sub/gsub/tr/tr_s(!) Eric Wong
2014-10-17  6:19 ` [PATCH 09/13] compile.c (opt_str_lit_1): hoist out of iseq_peephole_optimize Eric Wong
2014-10-17  6:19 ` [PATCH 10/13] test/ruby/test_string.rb: cleanup allocation tests Eric Wong
2014-10-17  6:19 ` [PATCH 11/13] test_string: cleanup tests Eric Wong
2014-10-17  6:19 ` [PATCH 12/13] opt_method.inc.tmpl: flatten Eric Wong
2014-10-17  6:19 ` [PATCH 13/13] opt_str_lit: switch to type mask for raw class comparisons Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).