dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH] opt_str_lit: one instruction, many optimizations
@ 2014-10-10  0:46 Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2014-10-10  0:46 UTC (permalink / raw)
  To: spew

This is a squash of the following commits in the "opt_str_lit"
branch of git://bogomips.org/ruby.git

It optimizes away object allocation for string literals in
the following cases:

* "lit" % obj
* str << "lit"
* "lit" + str
* str + "lit"
* "lit" * num
* "lit" === obj
* obj === "lit"
* "lit" == str
* str == "lit"
* "lit" != str
* str != "lit"

Full commit logs are in the git repository above,
and also viewable with $BROWSER at:
  http://bogomips.org/ruby.git/log/?h=opt_str_lit
---
 benchmark/bm_vm2_hash_aref_lit.rb |   6 ++
 benchmark/bm_vm2_hash_aset_lit.rb |   6 ++
 benchmark/bm_vm2_strcat.rb        |   7 ++
 benchmark/bm_vm2_streq1.rb        |   6 ++
 benchmark/bm_vm2_streq2.rb        |   6 ++
 benchmark/bm_vm2_streqq1.rb       |   6 ++
 benchmark/bm_vm2_streqq2.rb       |   6 ++
 benchmark/bm_vm2_strfmt.rb        |   5 +
 benchmark/bm_vm2_strplus1.rb      |   6 ++
 benchmark/bm_vm2_strplus2.rb      |   6 ++
 compile.c                         | 212 +++++++++++++++++++++++++++++++-------
 insns.def                         | 114 ++++++++++----------
 test/-ext-/symbol/test_type.rb    |   1 +
 test/objspace/test_objspace.rb    |   1 +
 test/ruby/envutil.rb              |  10 ++
 test/ruby/test_hash.rb            |   2 +
 test/ruby/test_iseq.rb            |   1 +
 test/ruby/test_string.rb          | 159 ++++++++++++++++++++++++++++
 18 files changed, 464 insertions(+), 96 deletions(-)

diff --git a/benchmark/bm_vm2_hash_aref_lit.rb b/benchmark/bm_vm2_hash_aref_lit.rb
new file mode 100644
index 0000000..a6d4d12
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aref_lit.rb
@@ -0,0 +1,6 @@
+h = { "foo" => nil }
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  h["foo"]
+end
diff --git a/benchmark/bm_vm2_hash_aset_lit.rb b/benchmark/bm_vm2_hash_aset_lit.rb
new file mode 100644
index 0000000..58339ec
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aset_lit.rb
@@ -0,0 +1,6 @@
+h = {}
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  h["foo"] = nil
+end
diff --git a/benchmark/bm_vm2_strcat.rb b/benchmark/bm_vm2_strcat.rb
new file mode 100644
index 0000000..b25ac6e
--- /dev/null
+++ b/benchmark/bm_vm2_strcat.rb
@@ -0,0 +1,7 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str << "const"
+  str.clear
+end
diff --git a/benchmark/bm_vm2_streq1.rb b/benchmark/bm_vm2_streq1.rb
new file mode 100644
index 0000000..2a4b0f8
--- /dev/null
+++ b/benchmark/bm_vm2_streq1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo == "literal"
+end
diff --git a/benchmark/bm_vm2_streq2.rb b/benchmark/bm_vm2_streq2.rb
new file mode 100644
index 0000000..986020d
--- /dev/null
+++ b/benchmark/bm_vm2_streq2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "literal" == foo
+end
diff --git a/benchmark/bm_vm2_streqq1.rb b/benchmark/bm_vm2_streqq1.rb
new file mode 100644
index 0000000..9183466
--- /dev/null
+++ b/benchmark/bm_vm2_streqq1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo === "literal"
+end
diff --git a/benchmark/bm_vm2_streqq2.rb b/benchmark/bm_vm2_streqq2.rb
new file mode 100644
index 0000000..f48a9cd
--- /dev/null
+++ b/benchmark/bm_vm2_streqq2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "literal" === foo
+end
diff --git a/benchmark/bm_vm2_strfmt.rb b/benchmark/bm_vm2_strfmt.rb
new file mode 100644
index 0000000..efb88b6
--- /dev/null
+++ b/benchmark/bm_vm2_strfmt.rb
@@ -0,0 +1,5 @@
+i = 0
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "%d" % i
+end
diff --git a/benchmark/bm_vm2_strplus1.rb b/benchmark/bm_vm2_strplus1.rb
new file mode 100644
index 0000000..714efb8
--- /dev/null
+++ b/benchmark/bm_vm2_strplus1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "a"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo + "b"
+end
diff --git a/benchmark/bm_vm2_strplus2.rb b/benchmark/bm_vm2_strplus2.rb
new file mode 100644
index 0000000..c7f91ed
--- /dev/null
+++ b/benchmark/bm_vm2_strplus2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "a"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "b" + foo
+end
diff --git a/compile.c b/compile.c
index dda22b0..54c7b9e 100644
--- a/compile.c
+++ b/compile.c
@@ -1703,6 +1703,45 @@ get_prev_insn(INSN *iobj)
     return 0;
 }
 
+static VALUE
+new_recvinfo_for_put(rb_iseq_t *iseq, VALUE str,
+		enum ruby_basic_operators bop, int redef_flag)
+{
+    VALUE ri = rb_ary_new_from_args(3, str, INT2FIX(bop), INT2FIX(redef_flag));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
+}
+
+static VALUE
+new_recvinfo_for_call(rb_iseq_t *iseq, VALUE str,
+		enum ruby_basic_operators bop, int redef_flag, ID mid)
+{
+    VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(bop), INT2FIX(redef_flag),
+				    ID2SYM(mid));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
+}
+
+static VALUE
+new_recvinfo_for_arg(rb_iseq_t *iseq, VALUE str,
+		enum ruby_basic_operators bop, int redef_flag,
+		VALUE klass, int recv_off)
+{
+    VALUE ri = rb_ary_new_from_args(5, str, INT2FIX(bop), INT2FIX(redef_flag),
+				    klass, INT2FIX(recv_off));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
+}
+
 static int
 iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcallopt)
 {
@@ -1819,6 +1858,84 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 	    }
 	}
     }
+
+    /* string literal optimizations */
+    if (iobj->insn_id == BIN(putstring)) {
+	INSN *niobj = (INSN *)get_next_insn((INSN *)list);
+
+	if (niobj && niobj->insn_id == BIN(send)) {
+	    rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0];
+
+	    if (!ci->blockiseq && !(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP)) {
+		VALUE ri = Qfalse;
+		VALUE str = iobj->operands[0];
+
+		switch (ci->orig_argc) {
+		  case 0:
+		    /*
+		     * optimize:
+		     * "literal".freeze
+		     * "literal".size
+		     * "literal".length
+		     */
+		    switch (ci->mid) {
+		      case idFreeze:
+			ri = new_recvinfo_for_call(iseq, str, BOP_FREEZE,
+					STRING_REDEFINED_OP_FLAG, ci->mid);
+			REMOVE_ELEM((LINK_ELEMENT *)niobj);
+			break;
+		      case idSize:
+			ri = new_recvinfo_for_put(iseq, str, BOP_SIZE,
+					STRING_REDEFINED_OP_FLAG);
+			break;
+		      case idLength:
+			ri = new_recvinfo_for_put(iseq, str, BOP_LENGTH,
+					STRING_REDEFINED_OP_FLAG);
+			break;
+		    }
+		    break;
+		  case 1:
+		    switch (ci->mid) {
+		      case idAREF:
+			/* optimize allocation: obj["lit"] */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_AREF,
+				    HASH_REDEFINED_OP_FLAG, rb_cHash, 0);
+			break;
+		      case idEq:
+			/* optimize allocation: obj == "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_EQ,
+				    STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			break;
+		      case idNeq:
+			/* optimize allocation: obj != "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_NEQ,
+				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			break;
+		      case idLTLT:
+			/* optimize allocation: obj << "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_LTLT,
+				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			break;
+		      case idPLUS:
+			/* optimize allocation: obj + "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_PLUS,
+				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			break;
+		      case idEqq:
+			/* optimize allocation: obj === "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, BOP_EQQ,
+				STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+			break;
+		    }
+		}
+		if (ri != Qfalse) {
+		    iobj->insn_id = BIN(opt_str_lit);
+		    iobj->operands[0] = ri;
+		}
+	    }
+	}
+    }
+
     return COMPILE_OK;
 }
 
@@ -3096,6 +3213,20 @@ build_postexe_iseq(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *body)
     return Qnil;
 }
 
+static enum ruby_basic_operators
+opt_str_lit_recv_bop(ID mid)
+{
+    switch (mid) {
+      case idEq: return BOP_EQ;
+      case idNeq: return BOP_NEQ;
+      case idPLUS: return BOP_PLUS;
+      case idMULT: return BOP_MULT;
+      case idMOD: return BOP_MOD;
+      case idEqq: return BOP_EQQ;
+    }
+    return BOP_LAST_;
+}
+
 /**
   compile each node
 
@@ -4238,37 +4369,6 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	break;
       }
       case NODE_CALL:
-	/* optimization shortcut
-	 *   "literal".freeze -> opt_str_freeze("literal")
-	 */
-	if (node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
-	    node->nd_mid == idFreeze && node->nd_args == NULL)
-	{
-	    VALUE str = rb_fstring(node->nd_recv->nd_lit);
-	    iseq_add_mark_object(iseq, str);
-	    ADD_INSN1(ret, line, opt_str_freeze, str);
-	    if (poped) {
-		ADD_INSN(ret, line, pop);
-	    }
-	    break;
-	}
-	/* optimization shortcut
-	 *   obj["literal"] -> opt_aref_with(obj, "literal")
-	 */
-	if (node->nd_mid == idAREF && !private_recv_p(node) && node->nd_args &&
-	    nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 1 &&
-	    nd_type(node->nd_args->nd_head) == NODE_STR)
-	{
-	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
-	    node->nd_args->nd_head->nd_lit = str;
-	    COMPILE(ret, "recv", node->nd_recv);
-	    ADD_INSN2(ret, line, opt_aref_with,
-		      new_callinfo(iseq, idAREF, 1, 0, 0), str);
-	    if (poped) {
-		ADD_INSN(ret, line, pop);
-	    }
-	    break;
-	}
       case NODE_FCALL:
       case NODE_VCALL:{		/* VCALL: variable or call */
 	/*
@@ -4352,7 +4452,31 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 #endif
 	/* receiver */
 	if (type == NODE_CALL) {
-	    COMPILE(recv, "recv", node->nd_recv);
+	    enum ruby_basic_operators bop;
+	    /*
+	     * optimize:
+	     *   "yoda" == other -> opt_str_lit("yoda").send(:==, other)
+	     *   "yoda" != other -> opt_str_lit("yoda").send(:!=, other)
+	     *   "str" + other -> opt_str_lit("str").send(:+, other)
+	     *   "str" * other -> opt_str_lit("str").send(:*, other)
+	     *   "fmt" % args -> opt_str_lit("str").send(:%, other)
+	     */
+	    if (iseq->compile_data->option->peephole_optimization &&
+		((bop = opt_str_lit_recv_bop(mid)) != BOP_LAST_) &&
+		!private_recv_p(node) &&
+		node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
+		node->nd_args && nd_type(node->nd_args) == NODE_ARRAY &&
+		node->nd_args->nd_alen == 1)
+	    {
+		VALUE yoda = rb_fstring(node->nd_recv->nd_lit);
+		VALUE recv_info = new_recvinfo_for_put(iseq, yoda,
+					    bop, STRING_REDEFINED_OP_FLAG);
+
+		node->nd_recv->nd_lit = yoda;
+		ADD_INSN1(recv, line, opt_str_lit, recv_info);
+	    } else {
+		COMPILE(recv, "recv", node->nd_recv);
+	    }
 	}
 	else if (type == NODE_FCALL || type == NODE_VCALL) {
 	    ADD_CALL_RECEIVER(recv, line);
@@ -5241,23 +5365,33 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	int asgnflag;
 
 	/* optimization shortcut
-	 *   obj["literal"] = value -> opt_aset_with(obj, "literal", value)
+	 *   obj["literal"] = val -> send(obj, :[]=, opt_str_lit("lit"), val)
+	 * TODO: ideally this should be done inside iseq_peephole_optimize,
+	 * but that would require a lot of scanning as the `val' (2nd arg)
+	 * is of variable distance between the :putstring and :send insns
 	 */
-	if (node->nd_mid == idASET && !private_recv_p(node) && node->nd_args &&
+	if (iseq->compile_data->option->peephole_optimization &&
+	    node->nd_mid == idASET && !private_recv_p(node) && node->nd_args &&
 	    nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 2 &&
 	    nd_type(node->nd_args->nd_head) == NODE_STR)
 	{
 	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
+	    VALUE recv_info = new_recvinfo_for_arg(iseq, str,
+					    BOP_ASET, HASH_REDEFINED_OP_FLAG,
+					    rb_cHash, 0);
+
 	    node->nd_args->nd_head->nd_lit = str;
-	    iseq_add_mark_object(iseq, str);
+	    if (!poped) {
+		ADD_INSN(ret, line, putnil);
+	    }
 	    COMPILE(ret, "recv", node->nd_recv);
+	    ADD_INSN1(ret, line, opt_str_lit, recv_info);
 	    COMPILE(ret, "value", node->nd_args->nd_next->nd_head);
 	    if (!poped) {
-		ADD_INSN(ret, line, swap);
-		ADD_INSN1(ret, line, topn, INT2FIX(1));
+		ADD_INSN1(ret, line, setn, INT2FIX(3));
 	    }
-	    ADD_INSN2(ret, line, opt_aset_with,
-		      new_callinfo(iseq, idASET, 2, 0, 0), str);
+	    flag = VM_CALL_ARGS_SKIP_SETUP;
+	    ADD_SEND_R(ret, line, node->nd_mid, 2, 0, INT2FIX(flag));
 	    ADD_INSN(ret, line, pop);
 	    break;
 	}
diff --git a/insns.def b/insns.def
index bfa11a9..f6740f8 100644
--- a/insns.def
+++ b/insns.def
@@ -356,6 +356,63 @@ putstring
 
 /**
   @c put
+  @e put string val. string may be created depending on recv_info conditions
+ */
+DEFINE_INSN
+opt_str_lit
+(VALUE recv_info)
+()
+(VALUE val)
+{
+    /*
+     * recv_info:
+     * 0 - str
+     * 1 - basic operator flag (BOP_*)
+     * 2 - redefined flag (*_REDEFINED_OP_FLAG)
+     * optional:
+     * 3 - Class (optimized receiver class) or Symbol (method name)
+     * 4 - stack offset (Fixint), only present if [3] is a Class,
+     *     -1 stack offset means receiver is the frozen string literal itself
+     */
+    const VALUE *ri = RARRAY_CONST_PTR(recv_info);
+    long len = RARRAY_LEN(recv_info);
+    enum ruby_basic_operators bop = FIX2INT(ri[1]);
+    int redef_flag = FIX2INT(ri[2]);
+
+    val = ri[0]; /* hopefully, this is the only val assignment we need */
+    if (len > 3) {
+	VALUE msym_or_class = ri[3];
+
+	/* check if the receiver is an on-stack object: */
+	if (!SYMBOL_P(msym_or_class)) {
+	    int n = FIX2INT(ri[4]);
+	    VALUE recv = n < 0 ? val : TOPN(n);
+
+	    if (SPECIAL_CONST_P(recv) ||
+		    RBASIC_CLASS(recv) != msym_or_class ||
+		    !BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+		/* bad, somebody redefined an optimized method, slow path: */
+		val = rb_str_resurrect(val);
+	    }
+	}
+	else { /* receiver is the string literal itself (e.g. "str".freeze) */
+	    if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+		/* bad, somebody redefined an optimized method, slow path: */
+		val = rb_str_resurrect(val);
+		val = rb_funcall(val, SYM2ID(msym_or_class), 0);
+	    }
+	}
+    }
+    else { /* string lit is receiver, but there are args */
+	if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+	    /* bad, somebody redefined an optimized method, slow path: */
+	    val = rb_str_resurrect(val);
+	}
+    }
+}
+
+/**
+  @c put
   @e put concatenate strings
   @j スタックトップの文字列を n 個連結し,結果をスタックにプッシュする。
  */
@@ -999,20 +1056,6 @@ send
     CALL_METHOD(ci);
 }
 
-DEFINE_INSN
-opt_str_freeze
-(VALUE str)
-()
-(VALUE val)
-{
-    if (BASIC_OP_UNREDEFINED_P(BOP_FREEZE, STRING_REDEFINED_OP_FLAG)) {
-	val = str;
-    }
-    else {
-	val = rb_funcall(rb_str_resurrect(str), idFreeze, 0);
-    }
-}
-
 /**
   @c optimize
   @e Invoke method without block, splat
@@ -1903,49 +1946,6 @@ opt_aset
 
 /**
   @c optimize
-  @e recv[str] = set
-  @j 最適化された recv[str] = set。
- */
-DEFINE_INSN
-opt_aset_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv, VALUE val)
-(VALUE val)
-{
-    if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) {
-	rb_hash_aset(recv, key, val);
-    }
-    else {
-	PUSH(recv);
-	PUSH(rb_str_resurrect(key));
-	PUSH(val);
-	CALL_SIMPLE_METHOD(recv);
-    }
-}
-
-/**
-  @c optimize
-  @e recv[str]
-  @j 最適化された recv[str]。
- */
-DEFINE_INSN
-opt_aref_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv)
-(VALUE val)
-{
-    if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) {
-	val = rb_hash_aref(recv, key);
-    }
-    else {
-	PUSH(recv);
-	PUSH(rb_str_resurrect(key));
-	CALL_SIMPLE_METHOD(recv);
-    }
-}
-
-/**
-  @c optimize
   @e optimized length
   @j 最適化された recv.length()。
  */
diff --git a/test/-ext-/symbol/test_type.rb b/test/-ext-/symbol/test_type.rb
index f1749f5..5bd79b8 100644
--- a/test/-ext-/symbol/test_type.rb
+++ b/test/-ext-/symbol/test_type.rb
@@ -4,6 +4,7 @@ require "-test-/symbol"
 module Test_Symbol
   class TestType < Test::Unit::TestCase
     def test_id2str_fstring_bug9171
+      require_compile_option(:peephole_optimization)
       fstr = eval("# encoding: us-ascii
         'foobar'.freeze")
       assert_same fstr, Bug::Symbol.id2str(:foobar)
diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb
index 8a5ed34..faacf48 100644
--- a/test/objspace/test_objspace.rb
+++ b/test/objspace/test_objspace.rb
@@ -195,6 +195,7 @@ class TestObjSpace < Test::Unit::TestCase
   end
 
   def test_dump_flags
+    require_compile_option(:peephole_optimization)
     info = ObjectSpace.dump("foo".freeze)
     assert_match /"wb_protected":true, "old":true, "long_lived":true, "marked":true/, info
     assert_match /"fstring":true/, info
diff --git a/test/ruby/envutil.rb b/test/ruby/envutil.rb
index 81b982c..e844822 100644
--- a/test/ruby/envutil.rb
+++ b/test/ruby/envutil.rb
@@ -477,6 +477,16 @@ eom
         AssertFile
       end
 
+      def require_compile_option(opt)
+        case RubyVM::InstructionSequence.compile_option[opt]
+        when true
+        when false
+          skip(":#{opt} disabled")
+        else
+          raise ArgumentError, "unrecognized compile option: #{opt.inspect}"
+        end
+      end
+
       class << (AssertFile = Struct.new(:failure_message).new)
         include Assertions
         def assert_file_predicate(predicate, *args)
diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb
index 4431552..bb7e8b5 100644
--- a/test/ruby/test_hash.rb
+++ b/test/ruby/test_hash.rb
@@ -216,6 +216,7 @@ class TestHash < Test::Unit::TestCase
   end
 
   def test_AREF_fstring_key
+    require_compile_option(:peephole_optimization)
     h = {"abc" => 1}
     before = GC.stat(:total_allocated_objects)
     5.times{ h["abc"] }
@@ -230,6 +231,7 @@ class TestHash < Test::Unit::TestCase
   end
 
   def test_NEWHASH_fstring_key
+    require_compile_option(:peephole_optimization)
     a = {"ABC" => :t}
     b = {"ABC" => :t}
     assert_same a.keys[0], b.keys[0]
diff --git a/test/ruby/test_iseq.rb b/test/ruby/test_iseq.rb
index 94a814c..ac1c417 100644
--- a/test/ruby/test_iseq.rb
+++ b/test/ruby/test_iseq.rb
@@ -118,6 +118,7 @@ class TestISeq < Test::Unit::TestCase
   end
 
   def test_label_fstring
+    require_compile_option(:peephole_optimization)
     c = Class.new{ def foobar() end }
 
     a, b = eval("# encoding: us-ascii\n'foobar'.freeze"),
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index d82d2bc..d3357b0 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1908,6 +1908,13 @@ class TestString < Test::Unit::TestCase
     }
   end
 
+  def test_literal_freeze
+    require_compile_option(:peephole_optimization)
+    before = GC.stat(:total_allocated_objects)
+    5.times { "".freeze }
+    assert_equal before, GC.stat(:total_allocated_objects)
+  end
+
   class S2 < String
   end
   def test_str_new4
@@ -2272,6 +2279,33 @@ class TestString < Test::Unit::TestCase
     end;
   end if [0].pack("l!").bytesize < [nil].pack("p").bytesize
   # enable only when string size range is smaller than memory space
+
+  def test_opt_strcat_with
+    assert_separately([], <<-RUBY)
+      class String
+        undef <<
+        def <<(str)
+          "overridden"
+        end
+      end
+      assert_equal("overridden", "" << "foo")
+      foo = "foo"
+      assert_equal("overridden", foo << "bar")
+    RUBY
+
+    if @cls == String
+      nr = 10
+      recv = ""
+      before = GC.stat(:total_allocated_objects)
+      nr.times { recv << "constant" }
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal "constant" * nr, recv
+
+      before = GC.stat(:total_allocated_objects)
+      nr.times { "recv" << "constant" }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+    end
+  end
 end
 
 class TestString2 < TestString
@@ -2279,4 +2313,129 @@ class TestString2 < TestString
     super
     @cls = S2
   end
+
+  def test_opt_str_lit
+    assert_separately([], <<-RUBY)
+      class String
+        undef ==
+        def ==(str)
+          :TROO
+        end
+      end
+      foo = "foo"
+      assert_equal(:TROO, (foo == "foo"), 'string == "peephole 2nd pass"')
+      assert_equal(:TROO, ("foo" == foo), '"yoda 1st pass" == string')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef !=
+        def !=(str)
+          :NOT
+        end
+      end
+      foo = ""
+      assert_equal(:NOT, ("foo" != foo), '"yoda 1st pass" != string')
+      assert_equal(:NOT, (foo != "foo"), 'string != "peephole 2nd pass"')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef size
+        undef length
+        def size
+          42
+        end
+        def length
+          42
+        end
+      end
+      assert_equal(42, "".size, 'lit string size')
+      assert_equal(42, "".length, 'lit string size')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef +
+        def +(other)
+          :plus
+        end
+      end
+      foo = "a"
+      assert_equal(:plus, "" + foo, 'lit plus')
+      assert_equal(:plus, foo + "", 'plus lit')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef *
+        def *(other)
+          :mult
+        end
+      end
+      assert_equal(:mult, "x" * 3, 'lit mult')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef ===
+        def ===(other)
+          other
+        end
+      end
+      str = "y"
+      assert_equal(false, "x" === false, 'lit threequal')
+      assert_equal("x", str === "x", 'threequal lit')
+    RUBY
+
+    if @cls == String
+      nr = 10
+
+      recv = "something"
+      res = []
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << (recv == "constant") } # opt_streq1
+      nr.times { res << ("constant" == recv) } # opt_streq2
+      nr.times { res << ("something " != recv) } # 1st pass peephole
+      nr.times { res << ("constant" == recv) } # opt_streq2
+      nr.times { res << ("constant" === recv) } # opt_streqq2
+      nr.times { res << (recv != "something") }  # 2nd pass peephole
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal [ false ], res.uniq!
+
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << (recv == "something") } # opt_streq1
+      nr.times { res << ("something" == recv) } # opt_streq2
+      nr.times { res << ("something" === recv) } # opt_streqq2
+      nr.times { res << (recv === "something") } # opt_streqq2
+      nr.times { res << ("constant" != recv) } # 1st pass peephole
+      nr.times { res << (recv != "constant") } # 2nd pass peephole
+      nr.times { res << ("a" != "b") } # 1st pass peephole
+      nr.times { res << ("a" == "a") } # 1st pass peephole
+      nr.times { res << ("".size == 0) } # 2nd pass peephole
+      nr.times { res << ("".length == 0) } # 2nd pass peephole
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal [ true ], res.uniq!
+
+      # :+ optimizations
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << ("foo" + recv) }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_equal [ "foosomething" ], res.uniq!
+
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << (recv + "foo") }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_equal [ "somethingfoo" ], res.uniq!
+
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << ('a' * 3) }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_equal [ "aaa" ], res.uniq!
+    end
+  end
 end
-- 
EW

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH] opt_str_lit: one instruction, many optimizations
@ 2014-10-14  2:42 Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2014-10-14  2:42 UTC (permalink / raw)
  To: spew

This is a squash of the following commits in the "opt_str_lit-v2"
branch of git://bogomips.org/ruby.git

It optimizes away object allocation for string literals in
the following cases:

* "lit" % obj
* str << "lit"
* "lit" + str
* str + "lit"
* "lit" * num
* "lit" === obj
* obj === "lit"
* "lit" == str
* str == "lit"
* "lit" != str
* str != "lit"
* str.(gsub,sub,tr,tr_s)(!)(any, "lit_b")

Full commit logs are in the git repository above,
and also viewable with $BROWSER at:
  http://bogomips.org/ruby.git/log/?h=opt_str_lit-v2
---
 benchmark/bm_vm2_gsub_bang_lit.rb |   6 +
 benchmark/bm_vm2_gsub_bang_re.rb  |   6 +
 benchmark/bm_vm2_gsub_re.rb       |   6 +
 benchmark/bm_vm2_hash_aref_lit.rb |   6 +
 benchmark/bm_vm2_hash_aset_lit.rb |   6 +
 benchmark/bm_vm2_strcat.rb        |   7 ++
 benchmark/bm_vm2_streq1.rb        |   6 +
 benchmark/bm_vm2_streq2.rb        |   6 +
 benchmark/bm_vm2_streqq1.rb       |   6 +
 benchmark/bm_vm2_streqq2.rb       |   6 +
 benchmark/bm_vm2_strfmt.rb        |   5 +
 benchmark/bm_vm2_strplus1.rb      |   6 +
 benchmark/bm_vm2_strplus2.rb      |   6 +
 benchmark/bm_vm2_tr_bang.rb       |   7 ++
 common.mk                         |  18 ++-
 compile.c                         | 255 ++++++++++++++++++++++++++++++++------
 defs/id.def                       |   9 ++
 defs/opt_method.def               |  57 +++++++++
 insns.def                         | 219 +++++++++++++++-----------------
 template/opt_method.h.tmpl        |  71 +++++++++++
 template/opt_method.inc.tmpl      |  49 ++++++++
 test/-ext-/symbol/test_type.rb    |   1 +
 test/objspace/test_objspace.rb    |   1 +
 test/ruby/envutil.rb              |  10 ++
 test/ruby/test_hash.rb            |   2 +
 test/ruby/test_iseq.rb            |   1 +
 test/ruby/test_string.rb          | 193 +++++++++++++++++++++++++++++
 vm.c                              |  67 ++--------
 vm_core.h                         |  44 +------
 vm_insnhelper.c                   |   8 +-
 vm_insnhelper.h                   |  25 +++-
 31 files changed, 853 insertions(+), 262 deletions(-)

diff --git a/benchmark/bm_vm2_gsub_bang_lit.rb b/benchmark/bm_vm2_gsub_bang_lit.rb
new file mode 100644
index 0000000..9251fb1
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_bang_lit.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.gsub!("nomatch", "")
+end
diff --git a/benchmark/bm_vm2_gsub_bang_re.rb b/benchmark/bm_vm2_gsub_bang_re.rb
new file mode 100644
index 0000000..e5fc9ea
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_bang_re.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.gsub!(/a/, "")
+end
diff --git a/benchmark/bm_vm2_gsub_re.rb b/benchmark/bm_vm2_gsub_re.rb
new file mode 100644
index 0000000..606f247
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_re.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.gsub(/a/, "")
+end
diff --git a/benchmark/bm_vm2_hash_aref_lit.rb b/benchmark/bm_vm2_hash_aref_lit.rb
new file mode 100644
index 0000000..a6d4d12
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aref_lit.rb
@@ -0,0 +1,6 @@
+h = { "foo" => nil }
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  h["foo"]
+end
diff --git a/benchmark/bm_vm2_hash_aset_lit.rb b/benchmark/bm_vm2_hash_aset_lit.rb
new file mode 100644
index 0000000..58339ec
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aset_lit.rb
@@ -0,0 +1,6 @@
+h = {}
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  h["foo"] = nil
+end
diff --git a/benchmark/bm_vm2_strcat.rb b/benchmark/bm_vm2_strcat.rb
new file mode 100644
index 0000000..b25ac6e
--- /dev/null
+++ b/benchmark/bm_vm2_strcat.rb
@@ -0,0 +1,7 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str << "const"
+  str.clear
+end
diff --git a/benchmark/bm_vm2_streq1.rb b/benchmark/bm_vm2_streq1.rb
new file mode 100644
index 0000000..2a4b0f8
--- /dev/null
+++ b/benchmark/bm_vm2_streq1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo == "literal"
+end
diff --git a/benchmark/bm_vm2_streq2.rb b/benchmark/bm_vm2_streq2.rb
new file mode 100644
index 0000000..986020d
--- /dev/null
+++ b/benchmark/bm_vm2_streq2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "literal" == foo
+end
diff --git a/benchmark/bm_vm2_streqq1.rb b/benchmark/bm_vm2_streqq1.rb
new file mode 100644
index 0000000..9183466
--- /dev/null
+++ b/benchmark/bm_vm2_streqq1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo === "literal"
+end
diff --git a/benchmark/bm_vm2_streqq2.rb b/benchmark/bm_vm2_streqq2.rb
new file mode 100644
index 0000000..f48a9cd
--- /dev/null
+++ b/benchmark/bm_vm2_streqq2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "literal" === foo
+end
diff --git a/benchmark/bm_vm2_strfmt.rb b/benchmark/bm_vm2_strfmt.rb
new file mode 100644
index 0000000..efb88b6
--- /dev/null
+++ b/benchmark/bm_vm2_strfmt.rb
@@ -0,0 +1,5 @@
+i = 0
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "%d" % i
+end
diff --git a/benchmark/bm_vm2_strplus1.rb b/benchmark/bm_vm2_strplus1.rb
new file mode 100644
index 0000000..714efb8
--- /dev/null
+++ b/benchmark/bm_vm2_strplus1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "a"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo + "b"
+end
diff --git a/benchmark/bm_vm2_strplus2.rb b/benchmark/bm_vm2_strplus2.rb
new file mode 100644
index 0000000..c7f91ed
--- /dev/null
+++ b/benchmark/bm_vm2_strplus2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "a"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "b" + foo
+end
diff --git a/benchmark/bm_vm2_tr_bang.rb b/benchmark/bm_vm2_tr_bang.rb
new file mode 100644
index 0000000..8065a65
--- /dev/null
+++ b/benchmark/bm_vm2_tr_bang.rb
@@ -0,0 +1,7 @@
+i = 0
+str = "a"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.tr!("a", "A")
+  str.tr!("A", "a")
+end
diff --git a/common.mk b/common.mk
index ce01aca..0a533a5 100644
--- a/common.mk
+++ b/common.mk
@@ -639,7 +639,7 @@ PROBES_H_INCLUDES  = {$(VPATH)}probes.h
 VM_CORE_H_INCLUDES = {$(VPATH)}vm_core.h {$(VPATH)}thread_$(THREAD_MODEL).h \
 		     {$(VPATH)}node.h {$(VPATH)}method.h {$(VPATH)}ruby_atomic.h \
 	             {$(VPATH)}vm_debug.h {$(VPATH)}id.h {$(VPATH)}thread_native.h \
-	             $(CCAN_LIST_INCLUDES)
+	             $(CCAN_LIST_INCLUDES) {$(VPATH)}opt_method.h
 
 ###
 
@@ -826,7 +826,7 @@ vm.$(OBJEXT): {$(VPATH)}vm.c {$(VPATH)}gc.h {$(VPATH)}iseq.h \
   $(VM_CORE_H_INCLUDES) {$(VPATH)}vm_method.c {$(VPATH)}vm_eval.c \
   {$(VPATH)}vm_insnhelper.c {$(VPATH)}vm_insnhelper.h {$(VPATH)}vm_exec.c \
   {$(VPATH)}vm_exec.h {$(VPATH)}insns.def {$(VPATH)}vmtc.inc \
-  {$(VPATH)}vm.inc {$(VPATH)}insns.inc \
+  {$(VPATH)}vm.inc {$(VPATH)}insns.inc {$(VPATH)}opt_method.inc \
   {$(VPATH)}internal.h {$(VPATH)}vm.h {$(VPATH)}constant.h \
   $(PROBES_H_INCLUDES) {$(VPATH)}probes_helper.h {$(VPATH)}vm_opts.h
 vm_dump.$(OBJEXT): {$(VPATH)}vm_dump.c $(RUBY_H_INCLUDES) \
@@ -931,6 +931,20 @@ incs: $(INSNS) {$(VPATH)}node_name.inc {$(VPATH)}encdb.h {$(VPATH)}transdb.h {$(
 
 insns: $(INSNS)
 
+opt_method.h: $(srcdir)/tool/generic_erb.rb \
+		$(srcdir)/template/opt_method.h.tmpl \
+		$(srcdir)/defs/opt_method.def
+	$(ECHO) generating $@
+	$(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
+		$(srcdir)/template/opt_method.h.tmpl
+
+opt_method.inc: $(srcdir)/tool/generic_erb.rb \
+		$(srcdir)/template/opt_method.inc.tmpl \
+		$(srcdir)/defs/opt_method.def
+	$(ECHO) generating $@
+	$(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
+		$(srcdir)/template/opt_method.inc.tmpl
+
 id.h: $(srcdir)/tool/generic_erb.rb $(srcdir)/template/id.h.tmpl $(srcdir)/defs/id.def
 	$(ECHO) generating $@
 	$(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
diff --git a/compile.c b/compile.c
index 8df7acf..205ff6a 100644
--- a/compile.c
+++ b/compile.c
@@ -1703,6 +1703,96 @@ get_prev_insn(INSN *iobj)
     return 0;
 }
 
+#define new_recvinfo_for_put(iseq,str,mid,klass) \
+    new_recvinfo_for_put_(iseq,str,OM_##mid##__##klass)
+static VALUE
+new_recvinfo_for_put_(rb_iseq_t *iseq, VALUE str, enum ruby_optimized_method om)
+{
+    VALUE ri = rb_ary_new_from_args(2, str, INT2FIX(om));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
+}
+
+#define new_recvinfo_for_call(iseq,str,mid,klass) \
+    new_recvinfo_for_call_((iseq),(str),OM_##mid##__##klass,(mid))
+static VALUE
+new_recvinfo_for_call_(rb_iseq_t *iseq, VALUE str,
+		    enum ruby_optimized_method om, ID mid)
+{
+    VALUE ri = rb_ary_new_from_args(3, str, INT2FIX(om), ID2SYM(mid));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
+}
+
+#define new_recvinfo_for_arg(iseq,str,mid,klass,off) \
+    new_recvinfo_for_arg_((iseq),(str),OM_##mid##__##klass,(rb_c##klass),(off))
+static VALUE
+new_recvinfo_for_arg_(rb_iseq_t *iseq, VALUE str,
+		enum ruby_optimized_method om, VALUE klass, int recv_off)
+{
+    VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(om),
+				    klass, INT2FIX(recv_off));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
+}
+
+/*
+ * optimize common calls which take two string literals:
+ *   foo.sub(/../, "to")
+ *   foo.sub!(/../, "to")
+ *   foo.gsub(/../, "to")
+ *   foo.gsub!(/../, "to")
+ *   foo.tr(/../, "to")
+ *   foo.tr!(/../, "to")
+ *   foo.tr_s(/../, "to")
+ *   foo.tr_s!(/../, "to")
+ */
+static VALUE
+opt_str_lit_2(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
+{
+    INSN *piobj;
+    enum ruby_optimized_method om = OM_LAST_;
+
+    switch (ci->mid) {
+#define C(mid) case mid: om = OM_##mid##__String; break
+      C(idSub);
+      C(idSub_bang);
+      C(idGsub);
+      C(idGsub_bang);
+      C(idTr);
+      C(idTr_bang);
+      C(idTr_s);
+      C(idTr_s_bang);
+#undef C
+      default: return Qfalse;
+    }
+
+    /*
+     * previous arg may be a string literal, too:
+     *   foo.gsub!("from", "to")
+     *   foo.tr!("from", "to")
+     *   ..
+     */
+    piobj = (INSN *)get_prev_insn(list);
+    if (piobj && piobj->insn_id == BIN(putstring)) {
+	VALUE pstr = piobj->operands[0];
+	VALUE pri = new_recvinfo_for_arg_(iseq, pstr, om, rb_cString, 0);
+	piobj->operands[0] = pri;
+	piobj->insn_id = BIN(opt_str_lit);
+    }
+
+    return new_recvinfo_for_arg_(iseq, str, om, rb_cString, 1);
+}
+
 static int
 iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcallopt)
 {
@@ -1819,6 +1909,79 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 	    }
 	}
     }
+
+    /* string literal optimizations */
+    if (iobj->insn_id == BIN(putstring)) {
+	INSN *niobj = (INSN *)get_next_insn((INSN *)list);
+
+	if (niobj && niobj->insn_id == BIN(send)) {
+	    rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0];
+
+	    if (!ci->blockiseq && !(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP)) {
+		VALUE ri = Qfalse;
+		VALUE str = iobj->operands[0];
+
+		switch (ci->orig_argc) {
+		  case 0:
+		    /*
+		     * optimize:
+		     * "literal".freeze
+		     * "literal".size
+		     * "literal".length
+		     */
+		    switch (ci->mid) {
+		      case idFreeze:
+			ri = new_recvinfo_for_call(iseq, str, idFreeze, String);
+			REMOVE_ELEM((LINK_ELEMENT *)niobj);
+			break;
+		      case idSize:
+			ri = new_recvinfo_for_put(iseq, str, idSize, String);
+			break;
+		      case idLength:
+			ri = new_recvinfo_for_put(iseq, str, idLength, String);
+			break;
+		    }
+		    break;
+		  case 1:
+		    switch (ci->mid) {
+		      case idAREF:
+			/* optimize allocation: obj["lit"] */
+			ri = new_recvinfo_for_arg(iseq, str, idAREF, Hash, 0);
+			break;
+		      case idEq:
+			/* optimize allocation: obj == "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, idEq, String, 0);
+			break;
+		      case idNeq:
+			/* optimize allocation: obj != "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, idNeq, String, 0);
+			break;
+		      case idLTLT:
+			/* optimize allocation: obj << "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, idLTLT, String, 0);
+			break;
+		      case idPLUS:
+			/* optimize allocation: obj + "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, idPLUS, String, 0);
+			break;
+		      case idEqq:
+			/* optimize allocation: obj === "lit" */
+			ri = new_recvinfo_for_arg(iseq, str, idEqq, String, 0);
+			break;
+		    }
+		    break;
+		  case 2:
+		    ri = opt_str_lit_2(iseq, str, ci, (INSN *)list);
+		    break;
+		}
+		if (ri != Qfalse) {
+		    iobj->insn_id = BIN(opt_str_lit);
+		    iobj->operands[0] = ri;
+		}
+	    }
+	}
+    }
+
     return COMPILE_OK;
 }
 
@@ -3096,6 +3259,20 @@ build_postexe_iseq(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *body)
     return Qnil;
 }
 
+static enum ruby_optimized_method
+opt_str_lit_recv_om(ID mid)
+{
+    switch (mid) {
+      case idEq: return OM_idEq__String;
+      case idNeq: return OM_idNeq__String;
+      case idPLUS: return OM_idPLUS__String;
+      case idMULT: return OM_idMULT__String;
+      case idMOD: return OM_idMOD__String;
+      case idEqq: return OM_idEqq__String;
+    }
+    return OM_LAST_;
+}
+
 /**
   compile each node
 
@@ -4238,37 +4415,6 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	break;
       }
       case NODE_CALL:
-	/* optimization shortcut
-	 *   "literal".freeze -> opt_str_freeze("literal")
-	 */
-	if (node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
-	    node->nd_mid == idFreeze && node->nd_args == NULL)
-	{
-	    VALUE str = rb_fstring(node->nd_recv->nd_lit);
-	    iseq_add_mark_object(iseq, str);
-	    ADD_INSN1(ret, line, opt_str_freeze, str);
-	    if (poped) {
-		ADD_INSN(ret, line, pop);
-	    }
-	    break;
-	}
-	/* optimization shortcut
-	 *   obj["literal"] -> opt_aref_with(obj, "literal")
-	 */
-	if (node->nd_mid == idAREF && !private_recv_p(node) && node->nd_args &&
-	    nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 1 &&
-	    nd_type(node->nd_args->nd_head) == NODE_STR)
-	{
-	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
-	    node->nd_args->nd_head->nd_lit = str;
-	    COMPILE(ret, "recv", node->nd_recv);
-	    ADD_INSN2(ret, line, opt_aref_with,
-		      new_callinfo(iseq, idAREF, 1, 0, 0), str);
-	    if (poped) {
-		ADD_INSN(ret, line, pop);
-	    }
-	    break;
-	}
       case NODE_FCALL:
       case NODE_VCALL:{		/* VCALL: variable or call */
 	/*
@@ -4352,7 +4498,30 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 #endif
 	/* receiver */
 	if (type == NODE_CALL) {
-	    COMPILE(recv, "recv", node->nd_recv);
+	    enum ruby_optimized_method om;
+	    /*
+	     * optimize:
+	     *   "yoda" == other -> opt_str_lit("yoda").send(:==, other)
+	     *   "yoda" != other -> opt_str_lit("yoda").send(:!=, other)
+	     *   "str" + other -> opt_str_lit("str").send(:+, other)
+	     *   "str" * other -> opt_str_lit("str").send(:*, other)
+	     *   "fmt" % args -> opt_str_lit("str").send(:%, other)
+	     */
+	    if (iseq->compile_data->option->peephole_optimization &&
+		((om = opt_str_lit_recv_om(mid)) != OM_LAST_) &&
+		!private_recv_p(node) &&
+		node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
+		node->nd_args && nd_type(node->nd_args) == NODE_ARRAY &&
+		node->nd_args->nd_alen == 1)
+	    {
+		VALUE yoda = rb_fstring(node->nd_recv->nd_lit);
+		VALUE recv_info = new_recvinfo_for_put_(iseq, yoda, om);
+
+		node->nd_recv->nd_lit = yoda;
+		ADD_INSN1(recv, line, opt_str_lit, recv_info);
+	    } else {
+		COMPILE(recv, "recv", node->nd_recv);
+	    }
 	}
 	else if (type == NODE_FCALL || type == NODE_VCALL) {
 	    ADD_CALL_RECEIVER(recv, line);
@@ -5241,23 +5410,31 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	int asgnflag;
 
 	/* optimization shortcut
-	 *   obj["literal"] = value -> opt_aset_with(obj, "literal", value)
+	 *   obj["literal"] = val -> send(obj, :[]=, opt_str_lit("lit"), val)
+	 * TODO: ideally this should be done inside iseq_peephole_optimize,
+	 * but that would require a lot of scanning as the `val' (2nd arg)
+	 * is of variable distance between the :putstring and :send insns
 	 */
-	if (node->nd_mid == idASET && !private_recv_p(node) && node->nd_args &&
+	if (iseq->compile_data->option->peephole_optimization &&
+	    node->nd_mid == idASET && !private_recv_p(node) && node->nd_args &&
 	    nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 2 &&
 	    nd_type(node->nd_args->nd_head) == NODE_STR)
 	{
 	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
+	    VALUE recv_info = new_recvinfo_for_arg(iseq, str, idASET, Hash, 0);
+
 	    node->nd_args->nd_head->nd_lit = str;
-	    iseq_add_mark_object(iseq, str);
+	    if (!poped) {
+		ADD_INSN(ret, line, putnil);
+	    }
 	    COMPILE(ret, "recv", node->nd_recv);
+	    ADD_INSN1(ret, line, opt_str_lit, recv_info);
 	    COMPILE(ret, "value", node->nd_args->nd_next->nd_head);
 	    if (!poped) {
-		ADD_INSN(ret, line, swap);
-		ADD_INSN1(ret, line, topn, INT2FIX(1));
+		ADD_INSN1(ret, line, setn, INT2FIX(3));
 	    }
-	    ADD_INSN2(ret, line, opt_aset_with,
-		      new_callinfo(iseq, idASET, 2, 0, 0), str);
+	    flag = VM_CALL_ARGS_SKIP_SETUP;
+	    ADD_SEND_R(ret, line, node->nd_mid, 2, 0, INT2FIX(flag));
 	    ADD_INSN(ret, line, pop);
 	    break;
 	}
diff --git a/defs/id.def b/defs/id.def
index f7fffbd..21aff93 100644
--- a/defs/id.def
+++ b/defs/id.def
@@ -57,6 +57,14 @@ firstline, predefined = __LINE__+1, %[\
   core#hash_merge_ary
   core#hash_merge_ptr
   core#hash_merge_kwd
+  gsub
+  gsub!
+  sub
+  sub!
+  tr
+  tr!
+  tr_s
+  tr_s!
 ]
 
 class KeywordError < RuntimeError
@@ -83,6 +91,7 @@ predefined.split(/^/).each_with_index do |line, num|
     token = "_#{token.gsub(/\W+/, '_')}"
   else
     token = token.sub(/\?/, 'P').sub(/\A[a-z]/) {$&.upcase}
+    token.sub!(/!\z/, "_bang")
     token.sub!(/\A\$/, "_G_")
     token.sub!(/\A@@/, "_C_")
     token.sub!(/\A@/, "_I_")
diff --git a/defs/opt_method.def b/defs/opt_method.def
new file mode 100644
index 0000000..e96cc9b
--- /dev/null
+++ b/defs/opt_method.def
@@ -0,0 +1,57 @@
+# byte align the bitmap for now, maybe some arches do better with long or int
+# we may also use a larger size (in the unlikely case) we need more than
+# 7 optimized classes per mid.   Currently this caps us to 256 optimized
+# (mid, klass) combinations (tested with OM_SHIFT=4, giving us 64K)
+OM_SHIFT = 3
+OM_ALIGN = 1 << OM_SHIFT
+OM_ALIGN_MASK = ~(OM_ALIGN - 1)
+OPT_METHODS = [
+  %w(idPLUS Fixnum Float String Array),
+  %w(idMINUS Fixnum Float),
+  %w(idMULT Fixnum Float String),
+  %w(idDIV Fixnum Float),
+  %w(idMOD Fixnum Float String),
+  %w(idEq Fixnum Float String),
+  %w(idNeq Fixnum Float String),
+  # id, mask classes
+  [ 'idEqq', %w(Bignum Fixnum Float Symbol), *%w(String) ],
+  %w(idLT Fixnum Float),
+  %w(idLE Fixnum Float),
+  %w(idGT Fixnum Float),
+  %w(idGE Fixnum Float),
+  %w(idLTLT String Array),
+  %w(idAREF Array Hash),
+  %w(idASET Array Hash),
+  %w(idLength Array String Hash),
+  %w(idSize Array String Hash),
+  %w(idEmptyP Array String Hash),
+  %w(idSucc Fixnum String Time),
+  %w(idEqTilde Regexp String),
+  %w(idFreeze String),
+  %w(idGsub String),
+  %w(idGsub_bang String),
+  %w(idSub String),
+  %w(idSub_bang String),
+  %w(idTr String),
+  %w(idTr_bang String),
+  %w(idTr_s String),
+  %w(idTr_s_bang String),
+]
+
+# for checking optimized classes,
+# speeds up method definitions of non-core classes
+def opt_classes
+  rv = {}
+  OPT_METHODS.each do |(_, *classes)|
+    classes.flatten.each { |c| rv[c] = true }
+  end
+  rv
+end
+
+def om(mid, klass)
+  if Array === klass
+    "OM_#{mid}__#{klass.join('_')}"
+  else
+    "OM_#{mid}__#{klass}"
+  end
+end
diff --git a/insns.def b/insns.def
index bfa11a9..e304338 100644
--- a/insns.def
+++ b/insns.def
@@ -356,6 +356,61 @@ putstring
 
 /**
   @c put
+  @e put string val. string may be created depending on recv_info conditions
+ */
+DEFINE_INSN
+opt_str_lit
+(VALUE recv_info)
+()
+(VALUE val)
+{
+    /*
+     * recv_info:
+     * 0 - str
+     * 1 - optimized method flag (OM_*)
+     * optional:
+     * 2 - Class (optimized receiver class) or Symbol (method name)
+     * 3 - stack offset (Fixint), only present if [3] is a Class,
+     *     -1 stack offset means receiver is the frozen string literal itself
+     */
+    const VALUE *ri = RARRAY_CONST_PTR(recv_info);
+    long len = RARRAY_LEN(recv_info);
+    enum ruby_optimized_method om = FIX2INT(ri[1]);
+
+    val = ri[0]; /* hopefully, this is the only val assignment we need */
+    if (len > 2) {
+	VALUE msym_or_class = ri[2];
+
+	/* check if the receiver is an on-stack object: */
+	if (!SYMBOL_P(msym_or_class)) {
+	    int n = FIX2INT(ri[3]);
+	    VALUE recv = n < 0 ? val : TOPN(n);
+
+	    if (SPECIAL_CONST_P(recv) ||
+		    RBASIC_CLASS(recv) != msym_or_class ||
+		    !rb_basic_op_unredefined_p(om)) {
+		/* bad, somebody redefined an optimized method, slow path: */
+		val = rb_str_resurrect(val);
+	    }
+	}
+	else { /* receiver is the string literal itself (e.g. "str".freeze) */
+	    if (!rb_basic_op_unredefined_p(om)) {
+		/* bad, somebody redefined an optimized method, slow path: */
+		val = rb_str_resurrect(val);
+		val = rb_funcall(val, SYM2ID(msym_or_class), 0);
+	    }
+	}
+    }
+    else { /* string lit is receiver, but there are args */
+	if (!rb_basic_op_unredefined_p(om)) {
+	    /* bad, somebody redefined an optimized method, slow path: */
+	    val = rb_str_resurrect(val);
+	}
+    }
+}
+
+/**
+  @c put
   @e put concatenate strings
   @j スタックトップの文字列を n 個連結し,結果をスタックにプッシュする。
  */
@@ -999,20 +1054,6 @@ send
     CALL_METHOD(ci);
 }
 
-DEFINE_INSN
-opt_str_freeze
-(VALUE str)
-()
-(VALUE val)
-{
-    if (BASIC_OP_UNREDEFINED_P(BOP_FREEZE, STRING_REDEFINED_OP_FLAG)) {
-	val = str;
-    }
-    else {
-	val = rb_funcall(rb_str_resurrect(str), idFreeze, 0);
-    }
-}
-
 /**
   @c optimize
   @e Invoke method without block, splat
@@ -1285,11 +1326,7 @@ opt_case_dispatch
       case T_FIXNUM:
       case T_BIGNUM:
       case T_STRING:
-	if (BASIC_OP_UNREDEFINED_P(BOP_EQQ,
-				   SYMBOL_REDEFINED_OP_FLAG |
-				   FIXNUM_REDEFINED_OP_FLAG |
-				   BIGNUM_REDEFINED_OP_FLAG |
-				   STRING_REDEFINED_OP_FLAG)) {
+	if (rb_basic_mask_unredefined_p(OM_idEqq__Bignum_Fixnum_Float_Symbol)) {
 	    st_data_t val;
 	    if (st_lookup(RHASH_TBL_RAW(hash), key, &val)) {
 		JUMP(FIX2INT((VALUE)val));
@@ -1317,8 +1354,7 @@ opt_plus
 (VALUE recv, VALUE obj)
 (VALUE val)
 {
-    if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_PLUS,FIXNUM_REDEFINED_OP_FLAG)) {
+    if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idPLUS, Fixnum)) {
 	/* fixnum + fixnum */
 #ifndef LONG_LONG_VALUE
 	val = (recv + (obj & (~1)));
@@ -1341,20 +1377,20 @@ opt_plus
 #endif
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idPLUS, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
-	    BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idPLUS, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
 	}
 	else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString &&
-		 BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idPLUS, String)) {
 	    val = rb_str_plus(recv, obj);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idPLUS, Array)) {
 	    val = rb_ary_plus(recv, obj);
 	}
 	else {
@@ -1381,7 +1417,7 @@ opt_minus
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_MINUS, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idMINUS, Fixnum)) {
 	long a, b, c;
 
 	a = FIX2LONG(recv);
@@ -1396,12 +1432,12 @@ opt_minus
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idMINUS, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idMINUS, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1429,7 +1465,7 @@ opt_mult
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_MULT, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idMULT, Fixnum)) {
 	long a, b;
 
 	a = FIX2LONG(recv);
@@ -1446,13 +1482,12 @@ opt_mult
             }
 	}
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMULT, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idMULT, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1478,8 +1513,7 @@ opt_div
 (VALUE recv, VALUE obj)
 (VALUE val)
 {
-    if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_DIV, FIXNUM_REDEFINED_OP_FLAG)) {
+    if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idDIV, Fixnum)) {
 	long x, y, div;
 
 	x = FIX2LONG(recv);
@@ -1509,13 +1543,12 @@ opt_div
 	}
 	val = LONG2NUM(div);
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idDIV, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idDIV, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1541,8 +1574,7 @@ opt_mod
 (VALUE recv, VALUE obj)
 (VALUE val)
 {
-    if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_MOD, FIXNUM_REDEFINED_OP_FLAG )) {
+    if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMOD, Fixnum )) {
 	long x, y;
 
 	x = FIX2LONG(recv);
@@ -1576,13 +1608,12 @@ opt_mod
 	    val = LONG2FIX(mod);
 	}
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMOD, Float)) {
 	val = DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
-	    BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idMOD, Float)) {
 	    val = DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)));
 	}
 	else {
@@ -1661,7 +1692,7 @@ opt_lt
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_LT, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idLT, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a < b) {
@@ -1672,13 +1703,13 @@ opt_lt
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idLT, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) < RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idLT, Float)) {
 	    val = double_cmp_lt(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1705,7 +1736,7 @@ opt_le
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_LE, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idLE, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a <= b) {
@@ -1716,7 +1747,7 @@ opt_le
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_LE, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idLE, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) <= RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
@@ -1740,7 +1771,7 @@ opt_gt
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_GT, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idGT, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a > b) {
@@ -1751,13 +1782,13 @@ opt_gt
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idGT, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) > RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idGT, Float)) {
 	    val = double_cmp_gt(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1784,7 +1815,7 @@ opt_ge
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_GE, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idGE, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a >= b) {
@@ -1794,8 +1825,7 @@ opt_ge
 	    val = Qfalse;
 	}
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_GE, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idGE, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) >= RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
@@ -1819,11 +1849,11 @@ opt_ltlt
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_LTLT, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idLTLT, String)) {
 	    val = rb_str_concat(recv, obj);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_LTLT, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idLTLT, Array)) {
 	    val = rb_ary_push(recv, obj);
 	}
 	else {
@@ -1850,10 +1880,10 @@ opt_aref
 (VALUE val)
 {
     if (!SPECIAL_CONST_P(recv)) {
-	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(BOP_AREF, ARRAY_REDEFINED_OP_FLAG) && FIXNUM_P(obj)) {
+	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(idAREF, Array) && FIXNUM_P(obj)) {
 	    val = rb_ary_entry(recv, FIX2LONG(obj));
 	}
-	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) {
+	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(idAREF, Hash)) {
 	    val = rb_hash_aref(recv, obj);
 	}
 	else {
@@ -1880,11 +1910,11 @@ opt_aset
 (VALUE val)
 {
     if (!SPECIAL_CONST_P(recv)) {
-	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(BOP_ASET, ARRAY_REDEFINED_OP_FLAG) && FIXNUM_P(obj)) {
+	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(idASET, Array) && FIXNUM_P(obj)) {
 	    rb_ary_store(recv, FIX2LONG(obj), set);
 	    val = set;
 	}
-	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) {
+	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(idASET, Hash)) {
 	    rb_hash_aset(recv, obj, set);
 	    val = set;
 	}
@@ -1903,49 +1933,6 @@ opt_aset
 
 /**
   @c optimize
-  @e recv[str] = set
-  @j 最適化された recv[str] = set。
- */
-DEFINE_INSN
-opt_aset_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv, VALUE val)
-(VALUE val)
-{
-    if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) {
-	rb_hash_aset(recv, key, val);
-    }
-    else {
-	PUSH(recv);
-	PUSH(rb_str_resurrect(key));
-	PUSH(val);
-	CALL_SIMPLE_METHOD(recv);
-    }
-}
-
-/**
-  @c optimize
-  @e recv[str]
-  @j 最適化された recv[str]。
- */
-DEFINE_INSN
-opt_aref_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv)
-(VALUE val)
-{
-    if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) {
-	val = rb_hash_aref(recv, key);
-    }
-    else {
-	PUSH(recv);
-	PUSH(rb_str_resurrect(key));
-	CALL_SIMPLE_METHOD(recv);
-    }
-}
-
-/**
-  @c optimize
   @e optimized length
   @j 最適化された recv.length()。
  */
@@ -1957,15 +1944,15 @@ opt_length
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_LENGTH, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idLength, String)) {
 	    val = rb_str_length(recv);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_LENGTH, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idLength, Array)) {
 	    val = LONG2NUM(RARRAY_LEN(recv));
 	}
 	else if (RBASIC_CLASS(recv) == rb_cHash &&
-		 BASIC_OP_UNREDEFINED_P(BOP_LENGTH, HASH_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idLength, Hash)) {
 	    val = INT2FIX(RHASH_SIZE(recv));
 	}
 	else {
@@ -1992,15 +1979,15 @@ opt_size
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_SIZE, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idSize, String)) {
 	    val = rb_str_length(recv);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_SIZE, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idSize, Array)) {
 	    val = LONG2NUM(RARRAY_LEN(recv));
 	}
 	else if (RBASIC_CLASS(recv) == rb_cHash &&
-		 BASIC_OP_UNREDEFINED_P(BOP_SIZE, HASH_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idSize, Hash)) {
 	    val = INT2FIX(RHASH_SIZE(recv));
 	}
 	else {
@@ -2027,17 +2014,17 @@ opt_empty_p
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idEmptyP, String)) {
 	    if (RSTRING_LEN(recv) == 0) val = Qtrue;
 	    else val = Qfalse;
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idEmptyP, Array)) {
 	    if (RARRAY_LEN(recv) == 0) val = Qtrue;
 	    else val = Qfalse;
 	}
 	else if (RBASIC_CLASS(recv) == rb_cHash &&
-		 BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, HASH_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idEmptyP, Hash)) {
 	    if (RHASH_EMPTY_P(recv)) val = Qtrue;
 	    else val = Qfalse;
 	}
@@ -2065,7 +2052,7 @@ opt_succ
 {
     if (SPECIAL_CONST_P(recv)) {
 	if (FIXNUM_P(recv) &&
-	    BASIC_OP_UNREDEFINED_P(BOP_SUCC, FIXNUM_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idSucc, Fixnum)) {
 	    const VALUE obj = INT2FIX(1);
 	    /* fixnum + INT2FIX(1) */
 	    val = (recv + (obj & (~1)));
@@ -2080,11 +2067,11 @@ opt_succ
     }
     else {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_SUCC, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idSucc, String)) {
 	    val = rb_str_succ(recv);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cTime &&
-		 BASIC_OP_UNREDEFINED_P(BOP_SUCC, TIME_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idSucc, Time)) {
 	    val = rb_time_succ(recv);
 	}
 	else
@@ -2134,7 +2121,7 @@ opt_regexpmatch1
 (VALUE obj)
 (VALUE val)
 {
-    if (BASIC_OP_UNREDEFINED_P(BOP_MATCH, REGEXP_REDEFINED_OP_FLAG)) {
+    if (BASIC_OP_UNREDEFINED_P(idEqTilde, Regexp)) {
 	val = rb_reg_match(r, obj);
     }
     else {
@@ -2154,7 +2141,7 @@ opt_regexpmatch2
 (VALUE val)
 {
     if (CLASS_OF(obj2) == rb_cString &&
-	BASIC_OP_UNREDEFINED_P(BOP_MATCH, STRING_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idEqTilde, String)) {
 	val = rb_reg_match(obj1, obj2);
     }
     else {
diff --git a/template/opt_method.h.tmpl b/template/opt_method.h.tmpl
new file mode 100644
index 0000000..39c4043
--- /dev/null
+++ b/template/opt_method.h.tmpl
@@ -0,0 +1,71 @@
+/* DO NOT EDIT THIS FILE DIRECTLY: edit template/opt_method.h.tmpl instead */
+#ifndef RUBY_OPT_METHOD_H
+#define RUBY_OPT_METHOD_H
+<%
+defs = File.join(File.dirname(File.dirname(erb.filename)), "defs/opt_method.def")
+eval(File.read(defs), binding, defs)
+%>
+typedef uint<%= OM_ALIGN %>_t rb_om_bitmap_t;
+
+enum ruby_optimized_method {
+<%
+opt_masks = {}
+n = 0
+OPT_METHODS.each do |(mid, *classes)|
+  classes.each do |klass|
+    if Array === klass
+      opt_masks[mid] = klass.dup
+      # we will align these in the second loop, below
+      next
+    end %>
+    <%= om(mid, klass) %> = <%= n += 1 %>,
+<%
+  end # classes.each
+end # OPT_METHODS.each
+
+# align multi-class bits so a single AND operation may
+# be byte-aligned and used to check an mid for up to 7 classes at once:
+opt_masks.each do |mid, classes|
+  # round up n to the next aligned byte slot
+  n = (n + OM_ALIGN) & OM_ALIGN_MASK
+
+  classes.each do |k|
+%>
+    <%= om(mid, k) %> = <%= n += 1 %>,
+<%=
+# we need this macro to generate shifts for the masks enums below:
+"#define #{om(mid, k)} (#{n})"
+%>
+<%
+  end # classes.each
+end # opt_masks.each
+if n >= ((1 << OM_ALIGN) - 1)
+  raise "OM_ALIGN needs to be raised to support more optimized methods"
+end
+%>
+    OM_LAST_ = <%= om_last = (n += 1) %>, /* for bitmap sizing */
+    /* special mask values below */
+<%
+# generate mask enums
+opt_masks.each do |mid, c|
+  # n.b.: negate masks to simplify the rb_opt_method_is_mask check:
+%>
+    <%= om(mid, c) %> = -(<%=
+      # pack into 16 bits so it may be a negative Fixnum
+      # 1) 8 byte offset
+      # 2) OM_ALIGN bytes mask (8 or 16)
+      sep = "|\n    "
+      "/* offset: */ ((#{om(mid, c[0])} / #{OM_ALIGN}) << #{OM_ALIGN}) " \
+      "#{sep} /* mask: */ (" +
+      c.map { |k| "(1U << (#{om(mid, k)} % #{OM_ALIGN}))" }.join(sep) + # mask
+      ')'
+  %>),
+<%
+end # opt_masks.each
+%>
+    OM_ALIGN_ = <%= OM_ALIGN %>,
+    OM_SIZE_ = <%= ((om_last + OM_ALIGN) & OM_ALIGN_MASK) / OM_ALIGN %>,
+    OM_GETMASK_ = (1 << OM_ALIGN_) - 1
+};
+
+#endif /* RUBY_OPT_METHOD_H */
diff --git a/template/opt_method.inc.tmpl b/template/opt_method.inc.tmpl
new file mode 100644
index 0000000..0501121
--- /dev/null
+++ b/template/opt_method.inc.tmpl
@@ -0,0 +1,49 @@
+/* DO NOT EDIT THIS FILE DIRECTLY: edit template/opt_method.inc.tmpl instead */
+<%
+defs = File.join(File.dirname(File.dirname(erb.filename)), "defs/opt_method.def")
+eval(File.read(defs), binding, defs)
+%>
+
+static void
+add_opt_method(st_table *tbl, VALUE klass, ID mid,
+		enum ruby_optimized_method om)
+{
+    rb_method_entry_t *me = rb_method_entry_at(klass, mid);
+
+    if (me && me->def && me->def->type == VM_METHOD_TYPE_CFUNC) {
+	st_insert(tbl, (st_data_t)me, (st_data_t)om);
+    }
+    else if (mid != idNeq) {
+	rb_bug("undefined optimized method: %s", rb_id2name(mid));
+    }
+}
+
+static void
+vm_init_redefined_flags(void *tbl)
+{
+<%
+OPT_METHODS.each do |(mid, *classes)|
+  classes.each do |klass|
+    if Array === klass
+      klass.each do |k|
+%>
+    add_opt_method(tbl, rb_c<%= k %>, <%= mid %>, <%= om(mid, k) %>);
+<%
+      end # klass.each
+    else
+%>
+    add_opt_method(tbl, rb_c<%= klass %>, <%= mid %>, <%= om(mid, klass) %>);
+<%  end # !(Array === klass)
+  end # classes.each
+end # OPT_METHODS.each
+%>
+}
+
+static int
+vm_redefinition_check_flag(VALUE klass)
+{
+<% opt_classes.each_key do |klass| %>
+    if (klass == rb_c<%= klass %>) return 1;
+<% end %>
+    return 0;
+}
diff --git a/test/-ext-/symbol/test_type.rb b/test/-ext-/symbol/test_type.rb
index f1749f5..5bd79b8 100644
--- a/test/-ext-/symbol/test_type.rb
+++ b/test/-ext-/symbol/test_type.rb
@@ -4,6 +4,7 @@ require "-test-/symbol"
 module Test_Symbol
   class TestType < Test::Unit::TestCase
     def test_id2str_fstring_bug9171
+      require_compile_option(:peephole_optimization)
       fstr = eval("# encoding: us-ascii
         'foobar'.freeze")
       assert_same fstr, Bug::Symbol.id2str(:foobar)
diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb
index 8a5ed34..faacf48 100644
--- a/test/objspace/test_objspace.rb
+++ b/test/objspace/test_objspace.rb
@@ -195,6 +195,7 @@ class TestObjSpace < Test::Unit::TestCase
   end
 
   def test_dump_flags
+    require_compile_option(:peephole_optimization)
     info = ObjectSpace.dump("foo".freeze)
     assert_match /"wb_protected":true, "old":true, "long_lived":true, "marked":true/, info
     assert_match /"fstring":true/, info
diff --git a/test/ruby/envutil.rb b/test/ruby/envutil.rb
index 81b982c..e844822 100644
--- a/test/ruby/envutil.rb
+++ b/test/ruby/envutil.rb
@@ -477,6 +477,16 @@ eom
         AssertFile
       end
 
+      def require_compile_option(opt)
+        case RubyVM::InstructionSequence.compile_option[opt]
+        when true
+        when false
+          skip(":#{opt} disabled")
+        else
+          raise ArgumentError, "unrecognized compile option: #{opt.inspect}"
+        end
+      end
+
       class << (AssertFile = Struct.new(:failure_message).new)
         include Assertions
         def assert_file_predicate(predicate, *args)
diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb
index 4431552..bb7e8b5 100644
--- a/test/ruby/test_hash.rb
+++ b/test/ruby/test_hash.rb
@@ -216,6 +216,7 @@ class TestHash < Test::Unit::TestCase
   end
 
   def test_AREF_fstring_key
+    require_compile_option(:peephole_optimization)
     h = {"abc" => 1}
     before = GC.stat(:total_allocated_objects)
     5.times{ h["abc"] }
@@ -230,6 +231,7 @@ class TestHash < Test::Unit::TestCase
   end
 
   def test_NEWHASH_fstring_key
+    require_compile_option(:peephole_optimization)
     a = {"ABC" => :t}
     b = {"ABC" => :t}
     assert_same a.keys[0], b.keys[0]
diff --git a/test/ruby/test_iseq.rb b/test/ruby/test_iseq.rb
index 94a814c..ac1c417 100644
--- a/test/ruby/test_iseq.rb
+++ b/test/ruby/test_iseq.rb
@@ -118,6 +118,7 @@ class TestISeq < Test::Unit::TestCase
   end
 
   def test_label_fstring
+    require_compile_option(:peephole_optimization)
     c = Class.new{ def foobar() end }
 
     a, b = eval("# encoding: us-ascii\n'foobar'.freeze"),
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index d82d2bc..8d46764 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1908,6 +1908,13 @@ class TestString < Test::Unit::TestCase
     }
   end
 
+  def test_literal_freeze
+    require_compile_option(:peephole_optimization)
+    before = GC.stat(:total_allocated_objects)
+    5.times { "".freeze }
+    assert_equal before, GC.stat(:total_allocated_objects)
+  end
+
   class S2 < String
   end
   def test_str_new4
@@ -2272,6 +2279,192 @@ class TestString < Test::Unit::TestCase
     end;
   end if [0].pack("l!").bytesize < [nil].pack("p").bytesize
   # enable only when string size range is smaller than memory space
+
+  def test_opt_strcat_with
+    assert_separately([], <<-RUBY)
+      class String
+        undef <<
+        def <<(str)
+          "overridden"
+        end
+      end
+      assert_equal("overridden", "" << "foo")
+      foo = "foo"
+      assert_equal("overridden", foo << "bar")
+    RUBY
+
+    if @cls == String
+      nr = 10
+      recv = ""
+      before = GC.stat(:total_allocated_objects)
+      nr.times { recv << "constant" }
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal "constant" * nr, recv
+
+      before = GC.stat(:total_allocated_objects)
+      nr.times { "recv" << "constant" }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+    end
+  end
+
+  def test_opt_str_lit
+    assert_separately([], <<-RUBY)
+      class String
+        undef ==
+        def ==(str)
+          :TROO
+        end
+      end
+      foo = "foo"
+      assert_equal(:TROO, (foo == "foo"), 'string == "peephole 2nd pass"')
+      assert_equal(:TROO, ("foo" == foo), '"yoda 1st pass" == string')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef !=
+        def !=(str)
+          :NOT
+        end
+      end
+      foo = ""
+      assert_equal(:NOT, ("foo" != foo), '"yoda 1st pass" != string')
+      assert_equal(:NOT, (foo != "foo"), 'string != "peephole 2nd pass"')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef size
+        undef length
+        def size
+          42
+        end
+        def length
+          42
+        end
+      end
+      assert_equal(42, "".size, 'lit string size')
+      assert_equal(42, "".length, 'lit string size')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef +
+        def +(other)
+          :plus
+        end
+      end
+      foo = "a"
+      assert_equal(:plus, "" + foo, 'lit plus')
+      assert_equal(:plus, foo + "", 'plus lit')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef *
+        def *(other)
+          :mult
+        end
+      end
+      assert_equal(:mult, "x" * 3, 'lit mult')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef ===
+        def ===(other)
+          other
+        end
+      end
+      str = "y"
+      assert_equal(false, "x" === false, 'lit threequal')
+      assert_equal("x", str === "x", 'threequal lit')
+    RUBY
+
+    if @cls == String
+      nr = 10
+
+      recv = "something"
+      res = []
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << (recv == "constant") } # opt_streq1
+      nr.times { res << ("constant" == recv) } # opt_streq2
+      nr.times { res << ("something" != recv) } # 1st pass peephole
+      nr.times { res << ("constant" == recv) } # opt_streq2
+      nr.times { res << ("constant" === recv) } # opt_streqq2
+      nr.times { res << (recv != "something") }  # 2nd pass peephole
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal [ false ], res.uniq!
+
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << (recv == "something") } # opt_streq1
+      nr.times { res << ("something" == recv) } # opt_streq2
+      nr.times { res << ("something" === recv) } # opt_streqq2
+      nr.times { res << (recv === "something") } # opt_streqq2
+      nr.times { res << ("constant" != recv) } # 1st pass peephole
+      nr.times { res << (recv != "constant") } # 2nd pass peephole
+      nr.times { res << ("a" != "b") } # 1st pass peephole
+      nr.times { res << ("a" == "a") } # 1st pass peephole
+      nr.times { res << ("".size == 0) } # 2nd pass peephole
+      nr.times { res << ("".length == 0) } # 2nd pass peephole
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal [ true ], res.uniq!
+
+      # :+ optimizations
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << ("foo" + recv) }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_equal [ "foosomething" ], res.uniq!
+
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << (recv + "foo") }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_equal [ "somethingfoo" ], res.uniq!
+
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << ('a' * 3) }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+      assert_equal [ "aaa" ], res.uniq!
+    end
+  end
+
+  def assert_no_new_allocations(mesg = "", adjust = 0)
+    before = GC.stat(:total_allocated_objects)
+    yield
+    after = GC.stat(:total_allocated_objects)
+    assert_equal before, after - adjust, mesg
+  end
+
+  def test_opt_str_lit_gsub
+    return if @cls != String
+    require_compile_option(:peephole_optimization)
+    foo = "foo"
+    re = /nomatch/
+    foo.gsub!(re, "00") # compile regexp
+    n = 3
+
+    assert_no_new_allocations("gsub var regexp") do
+      n.times { foo.gsub!(re, "00") }
+    end
+
+    # compiles re once:
+    assert_no_new_allocations("gsub lit regexp", 1) do
+      n.times { foo.gsub!(/nomatch/, "00") }
+    end
+
+    assert_no_new_allocations("gsub literal string") do
+      n.times { foo.gsub!("nomatch", "00") }
+    end
+
+    ary = [ [ re ] ]
+    assert_no_new_allocations("bigger stack") do
+      n.times { foo.gsub!(ary[0][0], "00") }
+    end
+  end
 end
 
 class TestString2 < TestString
diff --git a/vm.c b/vm.c
index 73adea4..4de83ae 100644
--- a/vm.c
+++ b/vm.c
@@ -20,6 +20,7 @@
 #include "eval_intern.h"
 #include "probes.h"
 #include "probes_helper.h"
+#include "opt_method.inc"
 
 static inline VALUE *
 VM_EP_LEP(VALUE *ep)
@@ -1134,30 +1135,16 @@ rb_iter_break_value(VALUE val)
 
 static st_table *vm_opt_method_table = 0;
 
-static int
-vm_redefinition_check_flag(VALUE klass)
-{
-    if (klass == rb_cFixnum) return FIXNUM_REDEFINED_OP_FLAG;
-    if (klass == rb_cFloat)  return FLOAT_REDEFINED_OP_FLAG;
-    if (klass == rb_cString) return STRING_REDEFINED_OP_FLAG;
-    if (klass == rb_cArray)  return ARRAY_REDEFINED_OP_FLAG;
-    if (klass == rb_cHash)   return HASH_REDEFINED_OP_FLAG;
-    if (klass == rb_cBignum) return BIGNUM_REDEFINED_OP_FLAG;
-    if (klass == rb_cSymbol) return SYMBOL_REDEFINED_OP_FLAG;
-    if (klass == rb_cTime)   return TIME_REDEFINED_OP_FLAG;
-    if (klass == rb_cRegexp) return REGEXP_REDEFINED_OP_FLAG;
-    return 0;
-}
-
 static void
 rb_vm_check_redefinition_opt_method(const rb_method_entry_t *me, VALUE klass)
 {
-    st_data_t bop;
+    st_data_t om;
     if (!me->def || me->def->type == VM_METHOD_TYPE_CFUNC) {
-	if (st_lookup(vm_opt_method_table, (st_data_t)me, &bop)) {
-	    int flag = vm_redefinition_check_flag(klass);
+	if (st_lookup(vm_opt_method_table, (st_data_t)me, &om)) {
+	    unsigned int i = om / OM_ALIGN_;
+	    rb_om_bitmap_t mask = (rb_om_bitmap_t)(1U << (om % OM_ALIGN_));
 
-	    ruby_vm_redefined_flag[bop] |= flag;
+	    ruby_vm_redefined_flag[i] |= mask;
 	}
     }
 }
@@ -1184,51 +1171,11 @@ rb_vm_check_redefinition_by_prepend(VALUE klass)
 }
 
 static void
-add_opt_method(VALUE klass, ID mid, VALUE bop)
-{
-    rb_method_entry_t *me = rb_method_entry_at(klass, mid);
-
-    if (me && me->def &&
-	me->def->type == VM_METHOD_TYPE_CFUNC) {
-	st_insert(vm_opt_method_table, (st_data_t)me, (st_data_t)bop);
-    }
-    else {
-	rb_bug("undefined optimized method: %s", rb_id2name(mid));
-    }
-}
-
-static void
 vm_init_redefined_flag(void)
 {
-    ID mid;
-    VALUE bop;
-
     vm_opt_method_table = st_init_numtable();
 
-#define OP(mid_, bop_) (mid = id##mid_, bop = BOP_##bop_, ruby_vm_redefined_flag[bop] = 0)
-#define C(k) add_opt_method(rb_c##k, mid, bop)
-    OP(PLUS, PLUS), (C(Fixnum), C(Float), C(String), C(Array));
-    OP(MINUS, MINUS), (C(Fixnum), C(Float));
-    OP(MULT, MULT), (C(Fixnum), C(Float));
-    OP(DIV, DIV), (C(Fixnum), C(Float));
-    OP(MOD, MOD), (C(Fixnum), C(Float));
-    OP(Eq, EQ), (C(Fixnum), C(Float), C(String));
-    OP(Eqq, EQQ), (C(Fixnum), C(Bignum), C(Float), C(Symbol), C(String));
-    OP(LT, LT), (C(Fixnum), C(Float));
-    OP(LE, LE), (C(Fixnum), C(Float));
-    OP(GT, GT), (C(Fixnum), C(Float));
-    OP(GE, GE), (C(Fixnum), C(Float));
-    OP(LTLT, LTLT), (C(String), C(Array));
-    OP(AREF, AREF), (C(Array), C(Hash));
-    OP(ASET, ASET), (C(Array), C(Hash));
-    OP(Length, LENGTH), (C(Array), C(String), C(Hash));
-    OP(Size, SIZE), (C(Array), C(String), C(Hash));
-    OP(EmptyP, EMPTY_P), (C(Array), C(String), C(Hash));
-    OP(Succ, SUCC), (C(Fixnum), C(String), C(Time));
-    OP(EqTilde, MATCH), (C(Regexp), C(String));
-    OP(Freeze, FREEZE), (C(String));
-#undef C
-#undef OP
+    vm_init_redefined_flags(vm_opt_method_table); /* opt_method.h.tmpl */
 }
 
 /* for vm development */
diff --git a/vm_core.h b/vm_core.h
index 3f1ddc8..10281ef 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -24,6 +24,7 @@
 #include "method.h"
 #include "ruby_atomic.h"
 #include "ccan/list/list.h"
+#include "opt_method.h"
 
 #include "ruby/thread_native.h"
 #if   defined(_WIN32)
@@ -320,33 +321,6 @@ enum ruby_special_exceptions {
     ruby_special_error_count
 };
 
-enum ruby_basic_operators {
-    BOP_PLUS,
-    BOP_MINUS,
-    BOP_MULT,
-    BOP_DIV,
-    BOP_MOD,
-    BOP_EQ,
-    BOP_EQQ,
-    BOP_LT,
-    BOP_LE,
-    BOP_LTLT,
-    BOP_AREF,
-    BOP_ASET,
-    BOP_LENGTH,
-    BOP_SIZE,
-    BOP_EMPTY_P,
-    BOP_SUCC,
-    BOP_GT,
-    BOP_GE,
-    BOP_NOT,
-    BOP_NEQ,
-    BOP_MATCH,
-    BOP_FREEZE,
-
-    BOP_LAST_
-};
-
 #define GetVMPtr(obj, ptr) \
   GetCoreDataFromValue((obj), rb_vm_t, (ptr))
 
@@ -441,7 +415,7 @@ typedef struct rb_vm_struct {
 	size_t fiber_machine_stack_size;
     } default_params;
 
-    short redefined_flag[BOP_LAST_];
+    rb_om_bitmap_t redefined_flag[OM_SIZE_];
 } rb_vm_t;
 
 /* default values */
@@ -458,18 +432,8 @@ typedef struct rb_vm_struct {
 #define RUBY_VM_FIBER_MACHINE_STACK_SIZE      (  64 * 1024 * sizeof(VALUE)) /*  256 KB or  512 KB */
 #define RUBY_VM_FIBER_MACHINE_STACK_SIZE_MIN  (  16 * 1024 * sizeof(VALUE)) /*   64 KB or  128 KB */
 
-/* optimize insn */
-#define FIXNUM_REDEFINED_OP_FLAG (1 << 0)
-#define FLOAT_REDEFINED_OP_FLAG  (1 << 1)
-#define STRING_REDEFINED_OP_FLAG (1 << 2)
-#define ARRAY_REDEFINED_OP_FLAG  (1 << 3)
-#define HASH_REDEFINED_OP_FLAG   (1 << 4)
-#define BIGNUM_REDEFINED_OP_FLAG (1 << 5)
-#define SYMBOL_REDEFINED_OP_FLAG (1 << 6)
-#define TIME_REDEFINED_OP_FLAG   (1 << 7)
-#define REGEXP_REDEFINED_OP_FLAG (1 << 8)
-
-#define BASIC_OP_UNREDEFINED_P(op, klass) (LIKELY((GET_VM()->redefined_flag[(op)]&(klass)) == 0))
+#define BASIC_OP_UNREDEFINED_P(mid, klass) \
+	rb_basic_op_unredefined_p(OM_##mid##__##klass)
 
 #ifndef VM_DEBUG_BP_CHECK
 #define VM_DEBUG_BP_CHECK 0
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 05ed3c6..2aedb46 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -872,17 +872,17 @@ VALUE
 opt_eq_func(VALUE recv, VALUE obj, CALL_INFO ci)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_EQ, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idEq, Fixnum)) {
 	return (recv == obj) ? Qtrue : Qfalse;
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_EQ, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idEq, Float)) {
 	return (recv == obj) ? Qtrue : Qfalse;
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat &&
 	    RBASIC_CLASS(obj) == rb_cFloat &&
-	    BASIC_OP_UNREDEFINED_P(BOP_EQ, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idEq, Float)) {
 	    double a = RFLOAT_VALUE(recv);
 	    double b = RFLOAT_VALUE(obj);
 
@@ -893,7 +893,7 @@ opt_eq_func(VALUE recv, VALUE obj, CALL_INFO ci)
 	}
 	else if (RBASIC_CLASS(recv) == rb_cString &&
 		 RBASIC_CLASS(obj) == rb_cString &&
-		 BASIC_OP_UNREDEFINED_P(BOP_EQ, STRING_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idEq, String)) {
 	    return rb_str_equal(recv, obj);
 	}
     }
diff --git a/vm_insnhelper.h b/vm_insnhelper.h
index 31f8ffc..a4290ee 100644
--- a/vm_insnhelper.h
+++ b/vm_insnhelper.h
@@ -229,5 +229,28 @@ enum vm_regan_acttype {
 static VALUE make_no_method_exception(VALUE exc, const char *format,
 				      VALUE obj, int argc, const VALUE *argv);
 
-
+static inline int
+rb_basic_op_unredefined_p(enum ruby_optimized_method om)
+{
+    unsigned int i = om / OM_ALIGN_;
+    rb_om_bitmap_t mask = (rb_om_bitmap_t)(1U << (om % OM_ALIGN_));
+
+    return LIKELY((GET_VM()->redefined_flag[i] & mask) == 0);
+}
+
+static inline int
+rb_basic_mask_unredefined_p(enum ruby_optimized_method om)
+{
+    unsigned int uom = (unsigned int)-om;
+    unsigned int offset = 0xffU & (uom >> OM_ALIGN_);
+    rb_om_bitmap_t mask = (rb_om_bitmap_t)(OM_GETMASK_ & uom);
+
+    return LIKELY((GET_VM()->redefined_flag[offset] & mask) == 0);
+}
+
+static inline int
+rb_opt_method_is_mask(enum ruby_optimized_method om)
+{
+    return !!((int)om < 0);
+}
 #endif /* RUBY_INSNHELPER_H */

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2014-10-14  2:42 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-14  2:42 [PATCH] opt_str_lit: one instruction, many optimizations Eric Wong
  -- strict thread matches above, loose matches on Subject: below --
2014-10-10  0:46 Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).