dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [RFC] opt_str_lit: optimize string literals in many cases
@ 2014-10-18  2:51 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2014-10-18  2:51 UTC (permalink / raw)
  To: spew

Note: I am considering splitting the opt_str_lit instruction into
3 instructions to avoid extra branching at runtime.

---
 benchmark/bm_vm2_array_delete_lit.rb  |   6 +
 benchmark/bm_vm2_array_include_lit.rb |   6 +
 benchmark/bm_vm2_gsub_bang_lit.rb     |   6 +
 benchmark/bm_vm2_gsub_bang_re.rb      |   6 +
 benchmark/bm_vm2_gsub_re.rb           |   6 +
 benchmark/bm_vm2_hash_aref_lit.rb     |   6 +
 benchmark/bm_vm2_hash_aset_lit.rb     |   6 +
 benchmark/bm_vm2_hash_delete_lit.rb   |   6 +
 benchmark/bm_vm2_set_include_lit.rb   |   7 +
 benchmark/bm_vm2_str_delete.rb        |   6 +
 benchmark/bm_vm2_strcat.rb            |   7 +
 benchmark/bm_vm2_streq1.rb            |   6 +
 benchmark/bm_vm2_streq2.rb            |   6 +
 benchmark/bm_vm2_streqq1.rb           |   6 +
 benchmark/bm_vm2_streqq2.rb           |   6 +
 benchmark/bm_vm2_strfmt.rb            |   5 +
 benchmark/bm_vm2_strplus1.rb          |   6 +
 benchmark/bm_vm2_strplus2.rb          |   6 +
 benchmark/bm_vm2_tr_bang.rb           |   7 +
 common.mk                             |  18 +-
 compile.c                             | 329 ++++++++++++++++++++++++++++++----
 defs/id.def                           |  35 ++++
 defs/opt_method.def                   |  87 +++++++++
 insns.def                             | 250 ++++++++++++++------------
 template/opt_method.h.tmpl            | 111 ++++++++++++
 template/opt_method.inc.tmpl          |  42 +++++
 test/-ext-/symbol/test_type.rb        |   1 +
 test/objspace/test_objspace.rb        |   1 +
 test/ruby/envutil.rb                  |  10 ++
 test/ruby/test_hash.rb                |   2 +
 test/ruby/test_iseq.rb                |   1 +
 test/ruby/test_optimization.rb        |  29 +++
 test/ruby/test_string.rb              | 159 ++++++++++++++++
 vm.c                                  |  67 +------
 vm_core.h                             |  44 +----
 vm_insnhelper.c                       |   8 +-
 vm_insnhelper.h                       |  27 +++
 37 files changed, 1076 insertions(+), 261 deletions(-)

diff --git a/benchmark/bm_vm2_array_delete_lit.rb b/benchmark/bm_vm2_array_delete_lit.rb
new file mode 100644
index 0000000..60d599a
--- /dev/null
+++ b/benchmark/bm_vm2_array_delete_lit.rb
@@ -0,0 +1,6 @@
+ary = []
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  ary.delete("foo")
+end
diff --git a/benchmark/bm_vm2_array_include_lit.rb b/benchmark/bm_vm2_array_include_lit.rb
new file mode 100644
index 0000000..c81e230
--- /dev/null
+++ b/benchmark/bm_vm2_array_include_lit.rb
@@ -0,0 +1,6 @@
+ary = []
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  ary.include?("foo")
+end
diff --git a/benchmark/bm_vm2_gsub_bang_lit.rb b/benchmark/bm_vm2_gsub_bang_lit.rb
new file mode 100644
index 0000000..9251fb1
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_bang_lit.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.gsub!("nomatch", "")
+end
diff --git a/benchmark/bm_vm2_gsub_bang_re.rb b/benchmark/bm_vm2_gsub_bang_re.rb
new file mode 100644
index 0000000..e5fc9ea
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_bang_re.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.gsub!(/a/, "")
+end
diff --git a/benchmark/bm_vm2_gsub_re.rb b/benchmark/bm_vm2_gsub_re.rb
new file mode 100644
index 0000000..606f247
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_re.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.gsub(/a/, "")
+end
diff --git a/benchmark/bm_vm2_hash_aref_lit.rb b/benchmark/bm_vm2_hash_aref_lit.rb
new file mode 100644
index 0000000..a6d4d12
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aref_lit.rb
@@ -0,0 +1,6 @@
+h = { "foo" => nil }
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  h["foo"]
+end
diff --git a/benchmark/bm_vm2_hash_aset_lit.rb b/benchmark/bm_vm2_hash_aset_lit.rb
new file mode 100644
index 0000000..58339ec
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aset_lit.rb
@@ -0,0 +1,6 @@
+h = {}
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  h["foo"] = nil
+end
diff --git a/benchmark/bm_vm2_hash_delete_lit.rb b/benchmark/bm_vm2_hash_delete_lit.rb
new file mode 100644
index 0000000..22dd95f
--- /dev/null
+++ b/benchmark/bm_vm2_hash_delete_lit.rb
@@ -0,0 +1,6 @@
+h = {}
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  h.delete("foo")
+end
diff --git a/benchmark/bm_vm2_set_include_lit.rb b/benchmark/bm_vm2_set_include_lit.rb
new file mode 100644
index 0000000..25d8b89
--- /dev/null
+++ b/benchmark/bm_vm2_set_include_lit.rb
@@ -0,0 +1,7 @@
+require 'set'
+set = Set.new
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  set.include?("foo")
+end
diff --git a/benchmark/bm_vm2_str_delete.rb b/benchmark/bm_vm2_str_delete.rb
new file mode 100644
index 0000000..c242f29
--- /dev/null
+++ b/benchmark/bm_vm2_str_delete.rb
@@ -0,0 +1,6 @@
+str = ''
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  str.delete("foo")
+end
diff --git a/benchmark/bm_vm2_strcat.rb b/benchmark/bm_vm2_strcat.rb
new file mode 100644
index 0000000..b25ac6e
--- /dev/null
+++ b/benchmark/bm_vm2_strcat.rb
@@ -0,0 +1,7 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str << "const"
+  str.clear
+end
diff --git a/benchmark/bm_vm2_streq1.rb b/benchmark/bm_vm2_streq1.rb
new file mode 100644
index 0000000..2a4b0f8
--- /dev/null
+++ b/benchmark/bm_vm2_streq1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo == "literal"
+end
diff --git a/benchmark/bm_vm2_streq2.rb b/benchmark/bm_vm2_streq2.rb
new file mode 100644
index 0000000..986020d
--- /dev/null
+++ b/benchmark/bm_vm2_streq2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "literal" == foo
+end
diff --git a/benchmark/bm_vm2_streqq1.rb b/benchmark/bm_vm2_streqq1.rb
new file mode 100644
index 0000000..9183466
--- /dev/null
+++ b/benchmark/bm_vm2_streqq1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo === "literal"
+end
diff --git a/benchmark/bm_vm2_streqq2.rb b/benchmark/bm_vm2_streqq2.rb
new file mode 100644
index 0000000..f48a9cd
--- /dev/null
+++ b/benchmark/bm_vm2_streqq2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "literal" === foo
+end
diff --git a/benchmark/bm_vm2_strfmt.rb b/benchmark/bm_vm2_strfmt.rb
new file mode 100644
index 0000000..efb88b6
--- /dev/null
+++ b/benchmark/bm_vm2_strfmt.rb
@@ -0,0 +1,5 @@
+i = 0
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "%d" % i
+end
diff --git a/benchmark/bm_vm2_strplus1.rb b/benchmark/bm_vm2_strplus1.rb
new file mode 100644
index 0000000..714efb8
--- /dev/null
+++ b/benchmark/bm_vm2_strplus1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "a"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo + "b"
+end
diff --git a/benchmark/bm_vm2_strplus2.rb b/benchmark/bm_vm2_strplus2.rb
new file mode 100644
index 0000000..c7f91ed
--- /dev/null
+++ b/benchmark/bm_vm2_strplus2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "a"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  "b" + foo
+end
diff --git a/benchmark/bm_vm2_tr_bang.rb b/benchmark/bm_vm2_tr_bang.rb
new file mode 100644
index 0000000..8065a65
--- /dev/null
+++ b/benchmark/bm_vm2_tr_bang.rb
@@ -0,0 +1,7 @@
+i = 0
+str = "a"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str.tr!("a", "A")
+  str.tr!("A", "a")
+end
diff --git a/common.mk b/common.mk
index ce01aca..0a533a5 100644
--- a/common.mk
+++ b/common.mk
@@ -639,7 +639,7 @@ PROBES_H_INCLUDES  = {$(VPATH)}probes.h
 VM_CORE_H_INCLUDES = {$(VPATH)}vm_core.h {$(VPATH)}thread_$(THREAD_MODEL).h \
 		     {$(VPATH)}node.h {$(VPATH)}method.h {$(VPATH)}ruby_atomic.h \
 	             {$(VPATH)}vm_debug.h {$(VPATH)}id.h {$(VPATH)}thread_native.h \
-	             $(CCAN_LIST_INCLUDES)
+	             $(CCAN_LIST_INCLUDES) {$(VPATH)}opt_method.h
 
 ###
 
@@ -826,7 +826,7 @@ vm.$(OBJEXT): {$(VPATH)}vm.c {$(VPATH)}gc.h {$(VPATH)}iseq.h \
   $(VM_CORE_H_INCLUDES) {$(VPATH)}vm_method.c {$(VPATH)}vm_eval.c \
   {$(VPATH)}vm_insnhelper.c {$(VPATH)}vm_insnhelper.h {$(VPATH)}vm_exec.c \
   {$(VPATH)}vm_exec.h {$(VPATH)}insns.def {$(VPATH)}vmtc.inc \
-  {$(VPATH)}vm.inc {$(VPATH)}insns.inc \
+  {$(VPATH)}vm.inc {$(VPATH)}insns.inc {$(VPATH)}opt_method.inc \
   {$(VPATH)}internal.h {$(VPATH)}vm.h {$(VPATH)}constant.h \
   $(PROBES_H_INCLUDES) {$(VPATH)}probes_helper.h {$(VPATH)}vm_opts.h
 vm_dump.$(OBJEXT): {$(VPATH)}vm_dump.c $(RUBY_H_INCLUDES) \
@@ -931,6 +931,20 @@ incs: $(INSNS) {$(VPATH)}node_name.inc {$(VPATH)}encdb.h {$(VPATH)}transdb.h {$(
 
 insns: $(INSNS)
 
+opt_method.h: $(srcdir)/tool/generic_erb.rb \
+		$(srcdir)/template/opt_method.h.tmpl \
+		$(srcdir)/defs/opt_method.def
+	$(ECHO) generating $@
+	$(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
+		$(srcdir)/template/opt_method.h.tmpl
+
+opt_method.inc: $(srcdir)/tool/generic_erb.rb \
+		$(srcdir)/template/opt_method.inc.tmpl \
+		$(srcdir)/defs/opt_method.def
+	$(ECHO) generating $@
+	$(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
+		$(srcdir)/template/opt_method.inc.tmpl
+
 id.h: $(srcdir)/tool/generic_erb.rb $(srcdir)/template/id.h.tmpl $(srcdir)/defs/id.def
 	$(ECHO) generating $@
 	$(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
diff --git a/compile.c b/compile.c
index 8df7acf..d371985 100644
--- a/compile.c
+++ b/compile.c
@@ -1703,6 +1703,162 @@ get_prev_insn(INSN *iobj)
     return 0;
 }
 
+#define new_recvinfo_for_put(iseq,str,mid,klass) \
+    new_recvinfo_for_put_(iseq,str,OM_##mid##__##klass)
+static VALUE
+new_recvinfo_for_put_(rb_iseq_t *iseq, VALUE str, enum ruby_optimized_method om)
+{
+    VALUE ri = rb_ary_new_from_args(2, str, INT2FIX(om));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
+}
+
+#define new_recvinfo_for_call(iseq,str,mid,klass) \
+    new_recvinfo_for_call_((iseq),(str),OM_##mid##__##klass,(mid))
+static VALUE
+new_recvinfo_for_call_(rb_iseq_t *iseq, VALUE str,
+		    enum ruby_optimized_method om, ID mid)
+{
+    VALUE ri = rb_ary_new_from_args(3, str, INT2FIX(om), ID2SYM(mid));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
+}
+
+#define new_recvinfo_for_arg(iseq,str,mid,klass,off) \
+    new_recvinfo_for_arg_((iseq),(str),(OM_##mid##__##klass),\
+                          (OM_TMASK_##klass),(off))
+static VALUE
+new_recvinfo_for_arg_(rb_iseq_t *iseq, VALUE str,
+		enum ruby_optimized_method om,
+		VALUE tmask, int recv_off)
+{
+    VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(om),
+				    tmask, INT2FIX(recv_off));
+
+    hide_obj(ri);
+    iseq_add_mark_object(iseq, ri);
+
+    return ri;
+}
+
+/*
+ * optimize allocation:
+ *   hash["lit"] # hash lookups
+ *   str == "lit"
+ *   str != "lit"
+ *   str << "lit"
+ *   str + "lit"
+ *   str === "lit"
+ */
+static VALUE
+opt_str_lit_1(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
+{
+    enum ruby_optimized_method om;
+    VALUE tmask;
+
+    switch (ci->mid) {
+#define C(mid,klass) \
+  case mid: \
+    om = OM_##mid##__##klass; \
+    tmask = OM_TMASK_##klass; \
+    break
+      C(idAREF, Hash);
+      C(idEq, String);
+      C(idNeq, String);
+      C(idLTLT, String);
+      C(idPLUS, String);
+      C(idEqq, String);
+      C(idDelete, Array_Hash_String);
+      C(idIncludeP, Array_Hash_String);
+      C(idMemberP, Hash);
+      C(idHas_keyP, Hash);
+      C(idKeyP, Hash);
+      C(idStrftime, Time);
+      C(idPack, Array);
+      C(idUnpack, String);
+      C(idSplit, String); /* TODO: str.split("lit", num) */
+      C(idJoin, Array);
+      C(idCount, String);
+      C(idChomp, String);
+      C(idChomp_bang, String);
+      C(idSqueeze, String);
+      C(idSqueeze_bang, String);
+      C(idDelete_bang, String);
+      C(idEncode, String);
+      C(idEncode_bang, String);
+      C(idForce_encoding, String);
+      C(idIndex, String); /* TODO: str.index("lit", num) */
+      C(idRindex, String);
+      C(idMatch, String);
+      C(idCasecmp, String);
+      C(idStart_withP, String);
+      C(idEnd_withP, String);
+#undef C
+      default: return Qfalse;
+    }
+
+    return new_recvinfo_for_arg_(iseq, str, om, tmask, 0);
+}
+
+/*
+ * optimize common calls which take two string literals:
+ *   foo.sub(/../, "to")
+ *   foo.sub!(/../, "to")
+ *   foo.gsub(/../, "to")
+ *   foo.gsub!(/../, "to")
+ *   foo.tr("from", "to")
+ *   foo.tr!("from", "to")
+ *   foo.tr_s("from", "to")
+ *   foo.tr_s!("from", "to")
+ */
+static VALUE
+opt_str_lit_2(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
+{
+    INSN *piobj;
+    enum ruby_optimized_method om;
+
+    switch (ci->mid) {
+#define C(mid) case mid: om = OM_##mid##__String; break
+      C(idSub);
+      C(idSub_bang);
+      C(idGsub);
+      C(idGsub_bang);
+      C(idTr);
+      C(idTr_bang);
+      C(idTr_s);
+      C(idTr_s_bang);
+      C(idInsert); /* String#insert(num, "lit") */
+
+      /* String#encode("dst", "src") */
+      C(idEncode);
+      C(idEncode_bang);
+#undef C
+      default: return Qfalse;
+    }
+
+    /*
+     * previous arg may be a string literal, too:
+     *   foo.gsub!("from", "to")
+     *   foo.tr!("from", "to")
+     *   ..
+     */
+    piobj = (INSN *)get_prev_insn(list);
+    if (piobj && piobj->insn_id == BIN(putstring)) {
+	VALUE pstr = piobj->operands[0];
+	VALUE pri = new_recvinfo_for_arg_(iseq, pstr, om, OM_TMASK_String, 0);
+	piobj->operands[0] = pri;
+	piobj->insn_id = BIN(opt_str_lit);
+    }
+
+    return new_recvinfo_for_arg_(iseq, str, om, OM_TMASK_String, 1);
+}
+
 static int
 iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcallopt)
 {
@@ -1819,6 +1975,54 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 	    }
 	}
     }
+
+    /* string literal optimizations */
+    if (iobj->insn_id == BIN(putstring)) {
+	INSN *niobj = (INSN *)get_next_insn((INSN *)list);
+
+	if (niobj && niobj->insn_id == BIN(send)) {
+	    rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0];
+
+	    if (!ci->blockiseq && !(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP)) {
+		VALUE ri = Qfalse;
+		VALUE str = iobj->operands[0];
+
+		switch (ci->orig_argc) {
+		  case 0:
+		    /*
+		     * optimize:
+		     * "literal".freeze
+		     * "literal".size
+		     * "literal".length
+		     */
+		    switch (ci->mid) {
+		      case idFreeze:
+			ri = new_recvinfo_for_call(iseq, str, idFreeze, String);
+			REMOVE_ELEM((LINK_ELEMENT *)niobj);
+			break;
+		      case idSize:
+			ri = new_recvinfo_for_put(iseq, str, idSize, String);
+			break;
+		      case idLength:
+			ri = new_recvinfo_for_put(iseq, str, idLength, String);
+			break;
+		    }
+		    break;
+		  case 1:
+		    ri = opt_str_lit_1(iseq, str, ci, (INSN *)list);
+		    break;
+		  case 2:
+		    ri = opt_str_lit_2(iseq, str, ci, (INSN *)list);
+		    break;
+		}
+		if (ri != Qfalse) {
+		    iobj->insn_id = BIN(opt_str_lit);
+		    iobj->operands[0] = ri;
+		}
+	    }
+	}
+    }
+
     return COMPILE_OK;
 }
 
@@ -3096,6 +3300,20 @@ build_postexe_iseq(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *body)
     return Qnil;
 }
 
+static enum ruby_optimized_method
+opt_str_lit_recv_om(ID mid)
+{
+    switch (mid) {
+      case idEq: return OM_idEq__String;
+      case idNeq: return OM_idNeq__String;
+      case idPLUS: return OM_idPLUS__String;
+      case idMULT: return OM_idMULT__String;
+      case idMOD: return OM_idMOD__String;
+      case idEqq: return OM_idEqq__String;
+    }
+    return OM_LAST_;
+}
+
 /**
   compile each node
 
@@ -4238,37 +4456,6 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	break;
       }
       case NODE_CALL:
-	/* optimization shortcut
-	 *   "literal".freeze -> opt_str_freeze("literal")
-	 */
-	if (node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
-	    node->nd_mid == idFreeze && node->nd_args == NULL)
-	{
-	    VALUE str = rb_fstring(node->nd_recv->nd_lit);
-	    iseq_add_mark_object(iseq, str);
-	    ADD_INSN1(ret, line, opt_str_freeze, str);
-	    if (poped) {
-		ADD_INSN(ret, line, pop);
-	    }
-	    break;
-	}
-	/* optimization shortcut
-	 *   obj["literal"] -> opt_aref_with(obj, "literal")
-	 */
-	if (node->nd_mid == idAREF && !private_recv_p(node) && node->nd_args &&
-	    nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 1 &&
-	    nd_type(node->nd_args->nd_head) == NODE_STR)
-	{
-	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
-	    node->nd_args->nd_head->nd_lit = str;
-	    COMPILE(ret, "recv", node->nd_recv);
-	    ADD_INSN2(ret, line, opt_aref_with,
-		      new_callinfo(iseq, idAREF, 1, 0, 0), str);
-	    if (poped) {
-		ADD_INSN(ret, line, pop);
-	    }
-	    break;
-	}
       case NODE_FCALL:
       case NODE_VCALL:{		/* VCALL: variable or call */
 	/*
@@ -4352,7 +4539,30 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 #endif
 	/* receiver */
 	if (type == NODE_CALL) {
-	    COMPILE(recv, "recv", node->nd_recv);
+	    enum ruby_optimized_method om;
+	    /*
+	     * optimize:
+	     *   "yoda" == other -> opt_str_lit("yoda").send(:==, other)
+	     *   "yoda" != other -> opt_str_lit("yoda").send(:!=, other)
+	     *   "str" + other -> opt_str_lit("str").send(:+, other)
+	     *   "str" * other -> opt_str_lit("str").send(:*, other)
+	     *   "fmt" % args -> opt_str_lit("str").send(:%, other)
+	     */
+	    if (iseq->compile_data->option->peephole_optimization &&
+		((om = opt_str_lit_recv_om(mid)) != OM_LAST_) &&
+		!private_recv_p(node) &&
+		node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
+		node->nd_args && nd_type(node->nd_args) == NODE_ARRAY &&
+		node->nd_args->nd_alen == 1)
+	    {
+		VALUE yoda = rb_fstring(node->nd_recv->nd_lit);
+		VALUE recv_info = new_recvinfo_for_put_(iseq, yoda, om);
+
+		node->nd_recv->nd_lit = yoda;
+		ADD_INSN1(recv, line, opt_str_lit, recv_info);
+	    } else {
+		COMPILE(recv, "recv", node->nd_recv);
+	    }
 	}
 	else if (type == NODE_FCALL || type == NODE_VCALL) {
 	    ADD_CALL_RECEIVER(recv, line);
@@ -5241,23 +5451,31 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	int asgnflag;
 
 	/* optimization shortcut
-	 *   obj["literal"] = value -> opt_aset_with(obj, "literal", value)
+	 *   obj["literal"] = val -> send(obj, :[]=, opt_str_lit("lit"), val)
+	 * TODO: ideally this should be done inside iseq_peephole_optimize,
+	 * but that would require a lot of scanning as the `val' (2nd arg)
+	 * is of variable distance between the :putstring and :send insns
 	 */
-	if (node->nd_mid == idASET && !private_recv_p(node) && node->nd_args &&
+	if (iseq->compile_data->option->peephole_optimization &&
+	    node->nd_mid == idASET && !private_recv_p(node) && node->nd_args &&
 	    nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 2 &&
 	    nd_type(node->nd_args->nd_head) == NODE_STR)
 	{
 	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
+	    VALUE recv_info = new_recvinfo_for_arg(iseq, str, idASET, Hash, 0);
+
 	    node->nd_args->nd_head->nd_lit = str;
-	    iseq_add_mark_object(iseq, str);
+	    if (!poped) {
+		ADD_INSN(ret, line, putnil);
+	    }
 	    COMPILE(ret, "recv", node->nd_recv);
+	    ADD_INSN1(ret, line, opt_str_lit, recv_info);
 	    COMPILE(ret, "value", node->nd_args->nd_next->nd_head);
 	    if (!poped) {
-		ADD_INSN(ret, line, swap);
-		ADD_INSN1(ret, line, topn, INT2FIX(1));
+		ADD_INSN1(ret, line, setn, INT2FIX(3));
 	    }
-	    ADD_INSN2(ret, line, opt_aset_with,
-		      new_callinfo(iseq, idASET, 2, 0, 0), str);
+	    flag = VM_CALL_ARGS_SKIP_SETUP;
+	    ADD_SEND_R(ret, line, node->nd_mid, 2, 0, INT2FIX(flag));
 	    ADD_INSN(ret, line, pop);
 	    break;
 	}
@@ -5906,3 +6124,36 @@ rb_parse_in_main(void)
 {
     return GET_THREAD()->parse_in_eval < 0;
 }
+
+/*
+ * Live bytecode patch:
+ *   - opt_str_lit(recv_info)
+ *   + putstring(str) # str is recv_info[0]
+ *
+ * If allocation optimization fails at this call site once, assume it
+ * will fail in the future.  This prevents performance regressions for
+ * things like #include? calls which may be used with unoptimized
+ * classes (Set,*DBM and many others) as well as optimized core classes
+ * (Array/Hash/String).  Call sites which only use optimized core
+ * classes will never get here.
+ */
+void
+rb_undo_opt_str_lit(rb_control_frame_t *cfp)
+{
+    VALUE *insn = cfp->pc - insn_len(BIN(opt_str_lit));
+
+#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
+    const void * const *table = rb_vm_get_insns_address_table();
+
+    assert((VALUE)table[BIN(opt_str_lit)] == insn[0] && "mismatch");
+    insn[0] = (VALUE)table[BIN(putstring)];
+#else
+    assert((VALUE)BIN(opt_str_lit) == insn[0] && "mismatch");
+    insn[0] = (VALUE)BIN(putstring);
+#endif
+    assert(insn_len(BIN(opt_str_lit)) == insn_len(BIN(putstring)));
+    assert(T_ARRAY == BUILTIN_TYPE(insn[1]));
+
+    /* n.b.: recv_info remains marked */
+    insn[1] = RARRAY_AREF(insn[1], 0); /* recv_info[0] == str */
+}
diff --git a/defs/id.def b/defs/id.def
index f7fffbd..43f87c7 100644
--- a/defs/id.def
+++ b/defs/id.def
@@ -57,6 +57,40 @@ firstline, predefined = __LINE__+1, %[\
   core#hash_merge_ary
   core#hash_merge_ptr
   core#hash_merge_kwd
+  gsub
+  gsub!
+  sub
+  sub!
+  tr
+  tr!
+  tr_s
+  tr_s!
+  delete
+  delete!
+  include?
+  member?
+  has_key?
+  key?
+  count
+  chomp
+  chomp!
+  squeeze
+  squeeze!
+  strftime
+  pack
+  unpack
+  split
+  join
+  encode
+  encode!
+  force_encoding
+  index
+  rindex
+  match
+  casecmp
+  insert
+  start_with?
+  end_with?
 ]
 
 class KeywordError < RuntimeError
@@ -83,6 +117,7 @@ predefined.split(/^/).each_with_index do |line, num|
     token = "_#{token.gsub(/\W+/, '_')}"
   else
     token = token.sub(/\?/, 'P').sub(/\A[a-z]/) {$&.upcase}
+    token.sub!(/!\z/, "_bang")
     token.sub!(/\A\$/, "_G_")
     token.sub!(/\A@@/, "_C_")
     token.sub!(/\A@/, "_I_")
diff --git a/defs/opt_method.def b/defs/opt_method.def
new file mode 100644
index 0000000..4aa2c69
--- /dev/null
+++ b/defs/opt_method.def
@@ -0,0 +1,87 @@
+# byte align the bitmap for now, maybe some arches do better with long or int
+# we may also use a larger size (in the unlikely case) we need more than
+# 7 optimized classes per mid.   Currently this caps us to 256 optimized
+# (mid, klass) combinations (tested with OM_SHIFT=4, giving us 64K)
+OM_SHIFT = 3
+OM_ALIGN = 1 << OM_SHIFT
+OM_ALIGN_MASK = ~(OM_ALIGN - 1)
+OPT_METHODS = [
+  %w(idPLUS Fixnum Float String Array),
+  %w(idMINUS Fixnum Float),
+  %w(idMULT Fixnum Float String),
+  %w(idDIV Fixnum Float),
+  %w(idMOD Fixnum Float String),
+  %w(idEq Fixnum Float String),
+  %w(idNeq Fixnum Float String),
+  # id, mask classes
+  [ 'idEqq', %w(Bignum Fixnum Float Symbol), *%w(String) ],
+  %w(idLT Fixnum Float),
+  %w(idLE Fixnum Float),
+  %w(idGT Fixnum Float),
+  %w(idGE Fixnum Float),
+  %w(idLTLT String Array),
+  %w(idAREF Array Hash),
+  %w(idASET Array Hash),
+  %w(idLength Array String Hash),
+  %w(idSize Array String Hash),
+  %w(idEmptyP Array String Hash),
+  %w(idSucc Fixnum String Time),
+  %w(idEqTilde Regexp String),
+  %w(idFreeze String),
+  %w(idGsub String),
+  %w(idGsub_bang String),
+  %w(idSub String),
+  %w(idSub_bang String),
+  %w(idTr String),
+  %w(idTr_bang String),
+  %w(idTr_s String),
+  %w(idTr_s_bang String),
+  [ "idDelete", %w(Array Hash String) ],
+  [ "idIncludeP", %w(Array Hash String) ],
+  %w(idMemberP Hash),
+  %w(idKeyP Hash),
+  %w(idHas_keyP Hash),
+  %w(idStrftime Time),
+  %w(idUnpack String),
+  %w(idPack Array),
+  %w(idSplit String),
+  %w(idJoin Array),
+  %w(idCount String),
+  %w(idChomp String),
+  %w(idChomp_bang String),
+  %w(idSqueeze String),
+  %w(idSqueeze_bang String),
+  %w(idDelete_bang String),
+  %w(idEncode String),
+  %w(idEncode_bang String),
+  %w(idForce_encoding String),
+  %w(idIndex String),
+  %w(idRindex String),
+  %w(idMatch String),
+  %w(idCasecmp String),
+  %w(idInsert String),
+  %w(idStart_withP String),
+  %w(idEnd_withP String),
+]
+
+# for checking optimized classes,
+# speeds up method definitions of non-core classes
+def opt_classes
+  rv = {}
+  OPT_METHODS.each do |(_, *classes)|
+    classes.flatten.each { |c| rv[c] = true }
+  end
+  rv
+end
+
+def om(mid, klass)
+  if Array === klass
+    "OM_#{mid}__#{klass.join('_')}"
+  else
+    "OM_#{mid}__#{klass}"
+  end
+end
+
+IS_T_DATA = {
+  "Time" => true
+}
diff --git a/insns.def b/insns.def
index bfa11a9..8855e06 100644
--- a/insns.def
+++ b/insns.def
@@ -356,6 +356,92 @@ putstring
 
 /**
   @c put
+  @e put string val. string may be created depending on recv_info conditions
+ */
+DEFINE_INSN
+opt_str_lit
+(VALUE recv_info)
+()
+(VALUE val)
+{
+    /*
+     * recv_info:
+     * 0 - str
+     * 1 - optimized method flag (OM_*)
+     * optional:
+     * 2 - class, tmask (optimized receiver classes) or Symbol (method name)
+     * 3 - stack offset (Fixint), only present if [2] is a Class,
+     *     -1 stack offset means receiver is the frozen string literal itself
+     */
+    const VALUE *ri = RARRAY_CONST_PTR(recv_info);
+    long len = RARRAY_LEN(recv_info);
+    enum ruby_optimized_method om = FIX2UINT(ri[1]);
+
+    val = ri[0]; /* hopefully, this is the only val assignment we need */
+
+    if (len > 2) {
+	VALUE cmask = ri[2];
+
+	switch (TYPE(cmask)) {
+	  case RUBY_T_FIXNUM: { /* tmask, most cases */
+	    int n = FIX2INT(ri[3]);
+	    VALUE recv = n < 0 ? val : TOPN(n);
+
+	    if (!SPECIAL_CONST_P(recv)) {
+		int tmask = FIX2INT(cmask);
+		enum ruby_value_type btype = BUILTIN_TYPE(recv);
+		int rmask = 1 << btype;
+
+		if ((rmask & tmask) &&
+			(rb_opt_method_class(btype) == RBASIC_CLASS(recv))) {
+		    if (rb_opt_method_is_mask(om)) {
+			if (rb_basic_mask_unredefined_p(om)) {
+			    goto out;
+			}
+		    }
+		    else if (rb_basic_op_unredefined_p(om)) {
+			goto out;
+		    }
+		}
+	    }
+	    goto do_resurrect;
+	  }
+	  case RUBY_T_CLASS: { /* T_DATA oddities (Time#strftime) */
+	    int n = FIX2INT(ri[3]);
+	    VALUE recv = n < 0 ? val : TOPN(n);
+
+	    if (cmask == RBASIC_CLASS(recv) && rb_basic_op_unredefined_p(om)) {
+		goto out;
+	    }
+	    goto do_resurrect;
+	  }
+	  case RUBY_T_SYMBOL:
+	    /* receiver is the string literal itself: */
+	    if (UNLIKELY(!rb_basic_op_unredefined_p(om))) {
+		val = rb_str_resurrect(val);
+		val = rb_funcall(val, SYM2ID(cmask), 0);
+		/*
+		 * do not bother with: rb_undo_opt_str_lit(GET_CFP());
+		 * here, it is crazy to redefine core String methods :P
+		 */
+	    }
+	    goto out;
+	  default:
+	    rb_bug("bad type as cmask: %+"PRIsVALUE, cmask);
+	}
+	UNREACHABLE;
+    }
+    else { /* string lit is receiver, but there are args */
+	if (rb_basic_op_unredefined_p(om)) goto out;
+    }
+do_resurrect:
+    val = rb_str_resurrect(val);
+    rb_undo_opt_str_lit(GET_CFP());
+out:
+}
+
+/**
+  @c put
   @e put concatenate strings
   @j スタックトップの文字列を n 個連結し,結果をスタックにプッシュする。
  */
@@ -999,20 +1085,6 @@ send
     CALL_METHOD(ci);
 }
 
-DEFINE_INSN
-opt_str_freeze
-(VALUE str)
-()
-(VALUE val)
-{
-    if (BASIC_OP_UNREDEFINED_P(BOP_FREEZE, STRING_REDEFINED_OP_FLAG)) {
-	val = str;
-    }
-    else {
-	val = rb_funcall(rb_str_resurrect(str), idFreeze, 0);
-    }
-}
-
 /**
   @c optimize
   @e Invoke method without block, splat
@@ -1285,11 +1357,7 @@ opt_case_dispatch
       case T_FIXNUM:
       case T_BIGNUM:
       case T_STRING:
-	if (BASIC_OP_UNREDEFINED_P(BOP_EQQ,
-				   SYMBOL_REDEFINED_OP_FLAG |
-				   FIXNUM_REDEFINED_OP_FLAG |
-				   BIGNUM_REDEFINED_OP_FLAG |
-				   STRING_REDEFINED_OP_FLAG)) {
+	if (rb_basic_mask_unredefined_p(OM_idEqq__Bignum_Fixnum_Float_Symbol)) {
 	    st_data_t val;
 	    if (st_lookup(RHASH_TBL_RAW(hash), key, &val)) {
 		JUMP(FIX2INT((VALUE)val));
@@ -1317,8 +1385,7 @@ opt_plus
 (VALUE recv, VALUE obj)
 (VALUE val)
 {
-    if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_PLUS,FIXNUM_REDEFINED_OP_FLAG)) {
+    if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idPLUS, Fixnum)) {
 	/* fixnum + fixnum */
 #ifndef LONG_LONG_VALUE
 	val = (recv + (obj & (~1)));
@@ -1341,20 +1408,20 @@ opt_plus
 #endif
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idPLUS, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
-	    BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idPLUS, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
 	}
 	else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString &&
-		 BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idPLUS, String)) {
 	    val = rb_str_plus(recv, obj);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idPLUS, Array)) {
 	    val = rb_ary_plus(recv, obj);
 	}
 	else {
@@ -1381,7 +1448,7 @@ opt_minus
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_MINUS, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idMINUS, Fixnum)) {
 	long a, b, c;
 
 	a = FIX2LONG(recv);
@@ -1396,12 +1463,12 @@ opt_minus
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idMINUS, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idMINUS, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1429,7 +1496,7 @@ opt_mult
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_MULT, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idMULT, Fixnum)) {
 	long a, b;
 
 	a = FIX2LONG(recv);
@@ -1446,13 +1513,12 @@ opt_mult
             }
 	}
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMULT, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idMULT, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1478,8 +1544,7 @@ opt_div
 (VALUE recv, VALUE obj)
 (VALUE val)
 {
-    if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_DIV, FIXNUM_REDEFINED_OP_FLAG)) {
+    if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idDIV, Fixnum)) {
 	long x, y, div;
 
 	x = FIX2LONG(recv);
@@ -1509,13 +1574,12 @@ opt_div
 	}
 	val = LONG2NUM(div);
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idDIV, Float)) {
 	val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idDIV, Float)) {
 	    val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1541,8 +1605,7 @@ opt_mod
 (VALUE recv, VALUE obj)
 (VALUE val)
 {
-    if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_MOD, FIXNUM_REDEFINED_OP_FLAG )) {
+    if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMOD, Fixnum )) {
 	long x, y;
 
 	x = FIX2LONG(recv);
@@ -1576,13 +1639,12 @@ opt_mod
 	    val = LONG2FIX(mod);
 	}
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMOD, Float)) {
 	val = DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)));
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
-	    BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idMOD, Float)) {
 	    val = DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)));
 	}
 	else {
@@ -1661,7 +1723,7 @@ opt_lt
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_LT, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idLT, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a < b) {
@@ -1672,13 +1734,13 @@ opt_lt
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idLT, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) < RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idLT, Float)) {
 	    val = double_cmp_lt(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1705,7 +1767,7 @@ opt_le
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_LE, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idLE, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a <= b) {
@@ -1716,7 +1778,7 @@ opt_le
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_LE, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idLE, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) <= RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
@@ -1740,7 +1802,7 @@ opt_gt
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_GT, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idGT, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a > b) {
@@ -1751,13 +1813,13 @@ opt_gt
 	}
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idGT, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) > RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat  &&
-	    BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idGT, Float)) {
 	    val = double_cmp_gt(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
 	}
 	else {
@@ -1784,7 +1846,7 @@ opt_ge
 (VALUE val)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_GE, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idGE, Fixnum)) {
 	SIGNED_VALUE a = recv, b = obj;
 
 	if (a >= b) {
@@ -1794,8 +1856,7 @@ opt_ge
 	    val = Qfalse;
 	}
     }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_GE, FLOAT_REDEFINED_OP_FLAG)) {
+    else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idGE, Float)) {
 	/* flonum is not NaN */
 	val = RFLOAT_VALUE(recv) >= RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
     }
@@ -1819,11 +1880,11 @@ opt_ltlt
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_LTLT, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idLTLT, String)) {
 	    val = rb_str_concat(recv, obj);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_LTLT, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idLTLT, Array)) {
 	    val = rb_ary_push(recv, obj);
 	}
 	else {
@@ -1850,10 +1911,10 @@ opt_aref
 (VALUE val)
 {
     if (!SPECIAL_CONST_P(recv)) {
-	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(BOP_AREF, ARRAY_REDEFINED_OP_FLAG) && FIXNUM_P(obj)) {
+	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(idAREF, Array) && FIXNUM_P(obj)) {
 	    val = rb_ary_entry(recv, FIX2LONG(obj));
 	}
-	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) {
+	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(idAREF, Hash)) {
 	    val = rb_hash_aref(recv, obj);
 	}
 	else {
@@ -1880,11 +1941,11 @@ opt_aset
 (VALUE val)
 {
     if (!SPECIAL_CONST_P(recv)) {
-	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(BOP_ASET, ARRAY_REDEFINED_OP_FLAG) && FIXNUM_P(obj)) {
+	if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(idASET, Array) && FIXNUM_P(obj)) {
 	    rb_ary_store(recv, FIX2LONG(obj), set);
 	    val = set;
 	}
-	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) {
+	else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(idASET, Hash)) {
 	    rb_hash_aset(recv, obj, set);
 	    val = set;
 	}
@@ -1903,49 +1964,6 @@ opt_aset
 
 /**
   @c optimize
-  @e recv[str] = set
-  @j 最適化された recv[str] = set。
- */
-DEFINE_INSN
-opt_aset_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv, VALUE val)
-(VALUE val)
-{
-    if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) {
-	rb_hash_aset(recv, key, val);
-    }
-    else {
-	PUSH(recv);
-	PUSH(rb_str_resurrect(key));
-	PUSH(val);
-	CALL_SIMPLE_METHOD(recv);
-    }
-}
-
-/**
-  @c optimize
-  @e recv[str]
-  @j 最適化された recv[str]。
- */
-DEFINE_INSN
-opt_aref_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv)
-(VALUE val)
-{
-    if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) {
-	val = rb_hash_aref(recv, key);
-    }
-    else {
-	PUSH(recv);
-	PUSH(rb_str_resurrect(key));
-	CALL_SIMPLE_METHOD(recv);
-    }
-}
-
-/**
-  @c optimize
   @e optimized length
   @j 最適化された recv.length()。
  */
@@ -1957,15 +1975,15 @@ opt_length
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_LENGTH, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idLength, String)) {
 	    val = rb_str_length(recv);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_LENGTH, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idLength, Array)) {
 	    val = LONG2NUM(RARRAY_LEN(recv));
 	}
 	else if (RBASIC_CLASS(recv) == rb_cHash &&
-		 BASIC_OP_UNREDEFINED_P(BOP_LENGTH, HASH_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idLength, Hash)) {
 	    val = INT2FIX(RHASH_SIZE(recv));
 	}
 	else {
@@ -1992,15 +2010,15 @@ opt_size
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_SIZE, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idSize, String)) {
 	    val = rb_str_length(recv);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_SIZE, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idSize, Array)) {
 	    val = LONG2NUM(RARRAY_LEN(recv));
 	}
 	else if (RBASIC_CLASS(recv) == rb_cHash &&
-		 BASIC_OP_UNREDEFINED_P(BOP_SIZE, HASH_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idSize, Hash)) {
 	    val = INT2FIX(RHASH_SIZE(recv));
 	}
 	else {
@@ -2027,17 +2045,17 @@ opt_empty_p
 {
     if (!SPECIAL_CONST_P(recv)) {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idEmptyP, String)) {
 	    if (RSTRING_LEN(recv) == 0) val = Qtrue;
 	    else val = Qfalse;
 	}
 	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, ARRAY_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idEmptyP, Array)) {
 	    if (RARRAY_LEN(recv) == 0) val = Qtrue;
 	    else val = Qfalse;
 	}
 	else if (RBASIC_CLASS(recv) == rb_cHash &&
-		 BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, HASH_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idEmptyP, Hash)) {
 	    if (RHASH_EMPTY_P(recv)) val = Qtrue;
 	    else val = Qfalse;
 	}
@@ -2065,7 +2083,7 @@ opt_succ
 {
     if (SPECIAL_CONST_P(recv)) {
 	if (FIXNUM_P(recv) &&
-	    BASIC_OP_UNREDEFINED_P(BOP_SUCC, FIXNUM_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idSucc, Fixnum)) {
 	    const VALUE obj = INT2FIX(1);
 	    /* fixnum + INT2FIX(1) */
 	    val = (recv + (obj & (~1)));
@@ -2080,11 +2098,11 @@ opt_succ
     }
     else {
 	if (RBASIC_CLASS(recv) == rb_cString &&
-	    BASIC_OP_UNREDEFINED_P(BOP_SUCC, STRING_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idSucc, String)) {
 	    val = rb_str_succ(recv);
 	}
 	else if (RBASIC_CLASS(recv) == rb_cTime &&
-		 BASIC_OP_UNREDEFINED_P(BOP_SUCC, TIME_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idSucc, Time)) {
 	    val = rb_time_succ(recv);
 	}
 	else
@@ -2134,7 +2152,7 @@ opt_regexpmatch1
 (VALUE obj)
 (VALUE val)
 {
-    if (BASIC_OP_UNREDEFINED_P(BOP_MATCH, REGEXP_REDEFINED_OP_FLAG)) {
+    if (BASIC_OP_UNREDEFINED_P(idEqTilde, Regexp)) {
 	val = rb_reg_match(r, obj);
     }
     else {
@@ -2154,7 +2172,7 @@ opt_regexpmatch2
 (VALUE val)
 {
     if (CLASS_OF(obj2) == rb_cString &&
-	BASIC_OP_UNREDEFINED_P(BOP_MATCH, STRING_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idEqTilde, String)) {
 	val = rb_reg_match(obj1, obj2);
     }
     else {
diff --git a/template/opt_method.h.tmpl b/template/opt_method.h.tmpl
new file mode 100644
index 0000000..ccff31d
--- /dev/null
+++ b/template/opt_method.h.tmpl
@@ -0,0 +1,111 @@
+/* DO NOT EDIT THIS FILE DIRECTLY: edit template/opt_method.h.tmpl instead */
+#ifndef RUBY_OPT_METHOD_H
+#define RUBY_OPT_METHOD_H
+<%
+defs = File.join(File.dirname(File.dirname(erb.filename)), "defs/opt_method.def")
+eval(File.read(defs), binding, defs)
+tmasks = []
+%>
+typedef uint<%= OM_ALIGN %>_t rb_om_bitmap_t;
+
+enum ruby_optimized_method {
+<%
+opt_masks = {}
+mask_classes = {}
+n = 0
+OPT_METHODS.each do |(mid, *classes)|
+  classes.each do |klass|
+    if Array === klass
+      opt_masks[mid] = klass.dup
+      # we will align these in the second loop, below
+      klass.each { |k| mask_classes[k] = true }
+      next
+    end %>
+    <%= om(mid, klass) %> = <%= n += 1 %>,
+<%
+  end # classes.each
+end # OPT_METHODS.each
+
+# align multi-class bits so a single AND operation may
+# be byte-aligned and used to check an mid for up to 7 classes at once:
+opt_masks.each do |mid, classes|
+  # round up n to the next aligned byte slot
+  n = (n + OM_ALIGN) & OM_ALIGN_MASK
+
+  classes.each do |k|
+%>
+    <%= om(mid, k) %> = <%= n += 1 %>,
+<%=
+# we need this macro to generate shifts for the masks enums below:
+"#define #{om(mid, k)} (#{n})"
+%>
+<%
+  end # classes.each
+end # opt_masks.each
+if n >= ((1 << OM_ALIGN) - 1)
+  raise "OM_ALIGN needs to be raised to support more optimized methods"
+end
+%>
+    OM_LAST_ = <%= om_last = (n += 1) %>, /* for bitmap sizing */
+    /* special mask values below */
+<%
+# generate mask enums
+opt_masks.each do |mid, c|
+  # n.b.: negate masks to simplify the rb_opt_method_is_mask check:
+%>
+    <%= om(mid, c) %> = -(<%=
+      # pack into 16 bits so it may be a negative Fixnum
+      # 1) 8 byte offset
+      # 2) OM_ALIGN bytes mask (8 or 16)
+      sep = "|\n    "
+      "/* offset: */ ((#{om(mid, c[0])} / #{OM_ALIGN}) << #{OM_ALIGN}) " \
+      "#{sep} /* mask: */ (" +
+      c.map { |k| "(1U << (#{om(mid, k)} % #{OM_ALIGN}))" }.join(sep) + # mask
+      ')'
+  %>),
+<%
+  # mask for type checking in insns.def, we name this like the OM_*
+  # enum so it is easy to get this name using CPP macros
+  tmasks << [
+    "OM_TMASK_#{c.join('_')}",
+    'INT2FIX(' +
+      c.map {|k| "(1U << RUBY_T_#{k.upcase})" }.join("|\\\n\t") +
+      ')'
+  ]
+end # opt_masks.each
+opt_classes.each_key do |k|
+  if IS_T_DATA[k]
+    tmasks << [ "OM_TMASK_#{k}", "rb_c#{k}" ]
+  else
+    tmasks << [ "OM_TMASK_#{k}", "INT2FIX(1U << RUBY_T_#{k.upcase})" ]
+  end
+end # opt_classes.each_key
+%>
+    OM_ALIGN_ = <%= OM_ALIGN %>,
+    OM_SIZE_ = <%= ((om_last + OM_ALIGN) & OM_ALIGN_MASK) / OM_ALIGN %>,
+    OM_GETMASK_ = (1 << OM_ALIGN_) - 1
+};
+
+/* macros */
+<% tmasks.each do |(k,v)| %>
+#define <%= k %> (<%= v %>)
+<% end %>
+
+/* map a raw type to the preferred (optimized) class */
+static inline VALUE
+rb_opt_method_class(enum ruby_value_type type)
+{
+    switch (type) {
+<%
+opt_classes.each_key do |k|
+  next if IS_T_DATA[k]
+%>
+      case RUBY_T_<%= k.upcase %>: return rb_c<%= k %>;
+<%
+end
+%>
+      default: return Qfalse;
+    }
+}
+
+#endif /* RUBY_OPT_METHOD_H */
diff --git a/template/opt_method.inc.tmpl b/template/opt_method.inc.tmpl
new file mode 100644
index 0000000..acbdc1a
--- /dev/null
+++ b/template/opt_method.inc.tmpl
@@ -0,0 +1,42 @@
+/* DO NOT EDIT THIS FILE DIRECTLY: edit template/opt_method.inc.tmpl instead */
+<%
+defs = File.join(File.dirname(File.dirname(erb.filename)), "defs/opt_method.def")
+eval(File.read(defs), binding, defs)
+%>
+
+static void
+add_opt_method(st_table *tbl, VALUE klass, ID mid,
+		enum ruby_optimized_method om)
+{
+    rb_method_entry_t *me = rb_method_entry_at(klass, mid);
+
+    if (me && me->def && me->def->type == VM_METHOD_TYPE_CFUNC) {
+	st_insert(tbl, (st_data_t)me, (st_data_t)om);
+    }
+    else if (mid != idNeq) {
+	rb_bug("undefined optimized method: %s", rb_id2name(mid));
+    }
+}
+
+static void
+vm_init_redefined_flags(void *tbl)
+{
+<%
+OPT_METHODS.each do |(mid, *classes)|
+  classes.flatten.each do |klass|
+%>
+    add_opt_method(tbl, rb_c<%= klass %>, <%= mid %>, <%= om(mid, klass) %>);
+<%
+  end # classes.each
+end # OPT_METHODS.each
+%>
+}
+
+static int
+vm_redefinition_check_flag(VALUE klass)
+{
+<% opt_classes.each_key do |klass| %>
+    if (klass == rb_c<%= klass %>) return 1;
+<% end %>
+    return 0;
+}
diff --git a/test/-ext-/symbol/test_type.rb b/test/-ext-/symbol/test_type.rb
index f1749f5..5bd79b8 100644
--- a/test/-ext-/symbol/test_type.rb
+++ b/test/-ext-/symbol/test_type.rb
@@ -4,6 +4,7 @@ require "-test-/symbol"
 module Test_Symbol
   class TestType < Test::Unit::TestCase
     def test_id2str_fstring_bug9171
+      require_compile_option(:peephole_optimization)
       fstr = eval("# encoding: us-ascii
         'foobar'.freeze")
       assert_same fstr, Bug::Symbol.id2str(:foobar)
diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb
index 8a5ed34..faacf48 100644
--- a/test/objspace/test_objspace.rb
+++ b/test/objspace/test_objspace.rb
@@ -195,6 +195,7 @@ class TestObjSpace < Test::Unit::TestCase
   end
 
   def test_dump_flags
+    require_compile_option(:peephole_optimization)
     info = ObjectSpace.dump("foo".freeze)
     assert_match /"wb_protected":true, "old":true, "long_lived":true, "marked":true/, info
     assert_match /"fstring":true/, info
diff --git a/test/ruby/envutil.rb b/test/ruby/envutil.rb
index f5fbb7c..bddaf82 100644
--- a/test/ruby/envutil.rb
+++ b/test/ruby/envutil.rb
@@ -520,6 +520,16 @@ eom
         end
       end
 
+      def require_compile_option(opt)
+        case RubyVM::InstructionSequence.compile_option[opt]
+        when true
+        when false
+          skip(":#{opt} disabled")
+        else
+          raise ArgumentError, "unrecognized compile option: #{opt.inspect}"
+        end
+      end
+
       class << (AssertFile = Struct.new(:failure_message).new)
         include Assertions
         def assert_file_predicate(predicate, *args)
diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb
index 4431552..bb7e8b5 100644
--- a/test/ruby/test_hash.rb
+++ b/test/ruby/test_hash.rb
@@ -216,6 +216,7 @@ class TestHash < Test::Unit::TestCase
   end
 
   def test_AREF_fstring_key
+    require_compile_option(:peephole_optimization)
     h = {"abc" => 1}
     before = GC.stat(:total_allocated_objects)
     5.times{ h["abc"] }
@@ -230,6 +231,7 @@ class TestHash < Test::Unit::TestCase
   end
 
   def test_NEWHASH_fstring_key
+    require_compile_option(:peephole_optimization)
     a = {"ABC" => :t}
     b = {"ABC" => :t}
     assert_same a.keys[0], b.keys[0]
diff --git a/test/ruby/test_iseq.rb b/test/ruby/test_iseq.rb
index 94a814c..ac1c417 100644
--- a/test/ruby/test_iseq.rb
+++ b/test/ruby/test_iseq.rb
@@ -118,6 +118,7 @@ class TestISeq < Test::Unit::TestCase
   end
 
   def test_label_fstring
+    require_compile_option(:peephole_optimization)
     c = Class.new{ def foobar() end }
 
     a, b = eval("# encoding: us-ascii\n'foobar'.freeze"),
diff --git a/test/ruby/test_optimization.rb b/test/ruby/test_optimization.rb
index 129f62a..49444bc 100644
--- a/test/ruby/test_optimization.rb
+++ b/test/ruby/test_optimization.rb
@@ -140,6 +140,18 @@ class TestRubyOptimization < Test::Unit::TestCase
     assert_redefine_method('String', '<<', 'assert_equal "b", "a" << "b"')
   end
 
+  def test_string_delete
+    assert_equal "foo", "foobar".delete("bar")
+    assert_redefine_method('String', 'delete',
+                           'assert_equal "b", "a".delete("b")')
+  end
+
+  def test_string_include?
+    assert_equal true, "foobar".include?("bar")
+    assert_redefine_method('String', 'include?',
+                           'assert_equal "b", "a".include?("b")')
+  end
+
   def test_array_plus
     assert_equal [1,2], [1]+[2]
     assert_redefine_method('Array', '+', 'assert_equal [2], [1]+[2]')
@@ -162,6 +174,15 @@ class TestRubyOptimization < Test::Unit::TestCase
     assert_redefine_method('Array', 'empty?', 'assert_nil([].empty?); assert_nil([1,2,3].empty?)')
   end
 
+  def test_array_delete
+    assert_equal "c", %w(a b c).delete("c")
+    assert_redefine_method('Array', 'delete', <<-end)
+      x = []
+      assert_equal x.object_id, [].delete(x).object_id
+      assert_equal x.object_id, %w(a b c).delete(x).object_id
+    end
+  end
+
   def test_hash_length
     assert_equal 0, {}.length
     assert_equal 1, {1=>1}.length
@@ -193,6 +214,14 @@ class TestRubyOptimization < Test::Unit::TestCase
     end
   end
 
+  def test_hash_delete
+    assert_equal(1, { "c" => 1 }.delete("c"))
+    assert_redefine_method('Hash', 'delete', <<-end)
+      assert_equal "c", {}.delete("c")
+      assert_equal "c", {"c" => 1}.delete("c")
+    end
+  end
+
   class MyObj
     def ==(other)
       true
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 543c138..d98355e 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1910,6 +1910,42 @@ class TestString < Test::Unit::TestCase
     }
   end
 
+  def test_literal_freeze
+    require_compile_option(:peephole_optimization)
+    assert_no_new_allocations do
+      5.times { "".freeze }
+    end
+  end
+
+  def test_literal_delete_args
+    require_compile_option(:peephole_optimization)
+    return unless @cls == String
+    nr = 5
+    objs = [ {}, [] ]
+    assert_no_new_allocations do
+      nr.times { objs.each { |obj| obj.delete("foo") } }
+    end
+    objs = [ {"foo" => 1}, %w(foo) ]
+    assert_no_new_allocations do
+      nr.times { objs.each { |obj| obj.delete("foo") } }
+    end
+
+    str = "foo"
+    assert_no_new_allocations("String#delete", nr * 2) do
+      nr.times { str.delete('O') }
+      nr.times { str.delete('o') }
+    end
+  end
+
+  def test_literal_include_p_args
+    require_compile_option(:peephole_optimization)
+    return unless @cls == String
+    objs = [ {"foo" => 1}, %w(foo), "foo", "", {}, [] ]
+    assert_no_new_allocations do
+      5.times { objs.each { |obj| obj.include?("foo") } }
+    end
+  end
+
   class S2 < String
   end
   def test_str_new4
@@ -2274,6 +2310,129 @@ class TestString < Test::Unit::TestCase
     end;
   end if [0].pack("l!").bytesize < [nil].pack("p").bytesize
   # enable only when string size range is smaller than memory space
+
+  def test_opt_strcat_with
+    if @cls == String
+      nr = 10
+      recv = ""
+      assert_no_new_allocations do
+        nr.times { recv << "constant" }
+      end
+      assert_equal "constant" * nr, recv
+
+      assert_no_new_allocations("'lit' << 'lit' (LTLT)", nr) do
+        nr.times { "recv" << "constant" }
+      end
+    end
+  end
+
+  def test_opt_str_lit
+    assert_separately([], <<-RUBY)
+      class String
+        undef *
+        def *(other)
+          :mult
+        end
+      end
+      assert_equal(:mult, "x" * 3, 'lit mult')
+    RUBY
+
+    assert_separately([], <<-RUBY)
+      class String
+        undef ===
+        def ===(other)
+          other
+        end
+      end
+      str = "y"
+      assert_equal(false, "x" === false, 'lit threequal')
+      assert_equal("x", str === "x", 'threequal lit')
+    RUBY
+
+    if @cls == String
+      nr = 10
+
+      recv = "something"
+      res = []
+      assert_no_new_allocations("false comparisons") do
+        nr.times { res << (recv == "constant") } # opt_streq1
+        nr.times { res << ("constant" == recv) } # opt_streq2
+        nr.times { res << ("something" != recv) } # 1st pass peephole
+        nr.times { res << ("constant" == recv) } # opt_streq2
+        nr.times { res << ("constant" === recv) } # opt_streqq2
+        nr.times { res << (recv != "something") }  # 2nd pass peephole
+      end
+      assert_equal [ false ], res.uniq!
+
+      res.clear
+      assert_no_new_allocations("true comparisons") do
+        nr.times { res << (recv == "something") } # opt_streq1
+        nr.times { res << ("something" == recv) } # opt_streq2
+        nr.times { res << ("something" === recv) } # opt_streqq2
+        nr.times { res << (recv === "something") } # opt_streqq2
+        nr.times { res << ("constant" != recv) } # 1st pass peephole
+        nr.times { res << (recv != "constant") } # 2nd pass peephole
+        nr.times { res << ("a" != "b") } # 1st pass peephole
+        nr.times { res << ("a" == "a") } # 1st pass peephole
+        nr.times { res << ("".size == 0) } # 2nd pass peephole
+        nr.times { res << ("".length == 0) } # 2nd pass peephole
+      end
+      assert_equal [ true ], res.uniq!
+
+      # :+ optimizations
+      res.clear
+      assert_no_new_allocations("'str' + (PLUS)", nr) do
+        nr.times { res << ("foo" + recv) }
+      end
+      assert_equal [ "foosomething" ], res.uniq!
+
+      res.clear
+      assert_no_new_allocations("+ 'str' (PLUS)", nr) do
+        nr.times { res << (recv + "foo") }
+      end
+      assert_equal [ "somethingfoo" ], res.uniq!
+
+      res.clear
+      assert_no_new_allocations("'str' * (MULT)", nr) do
+        nr.times { res << ('a' * 3) }
+      end
+      assert_equal [ "aaa" ], res.uniq!
+    end
+  end
+
+  def assert_no_new_allocations(mesg = "", adjust = 0)
+    before = GC.stat(:total_allocated_objects)
+    yield
+    after = GC.stat(:total_allocated_objects)
+    assert_equal before, after - adjust, mesg
+  end
+
+  def test_opt_str_lit_gsub
+    return if @cls != String
+    require_compile_option(:peephole_optimization)
+    foo = "foo"
+    re = /nomatch/
+    foo.gsub!(re, "00") # compile regexp
+    n = 3
+
+    assert_no_new_allocations("gsub var regexp") do
+      n.times { foo.gsub!(re, "00") }
+    end
+
+    # compiles re once:
+    assert_no_new_allocations("gsub lit regexp", 1) do
+      n.times { foo.gsub!(/nomatch/, "00") }
+    end
+
+    assert_no_new_allocations("gsub literal string") do
+      n.times { foo.gsub!("nomatch", "00") }
+    end
+
+    ary = [ [ re ] ]
+    assert_no_new_allocations("bigger stack") do
+      n.times { foo.gsub!(ary[0][0], "00") }
+    end
+  end
 end
 
 class TestString2 < TestString
diff --git a/vm.c b/vm.c
index cc88926..35c4120 100644
--- a/vm.c
+++ b/vm.c
@@ -20,6 +20,7 @@
 #include "eval_intern.h"
 #include "probes.h"
 #include "probes_helper.h"
+#include "opt_method.inc"
 
 static inline VALUE *
 VM_EP_LEP(VALUE *ep)
@@ -1134,30 +1135,16 @@ rb_iter_break_value(VALUE val)
 
 static st_table *vm_opt_method_table = 0;
 
-static int
-vm_redefinition_check_flag(VALUE klass)
-{
-    if (klass == rb_cFixnum) return FIXNUM_REDEFINED_OP_FLAG;
-    if (klass == rb_cFloat)  return FLOAT_REDEFINED_OP_FLAG;
-    if (klass == rb_cString) return STRING_REDEFINED_OP_FLAG;
-    if (klass == rb_cArray)  return ARRAY_REDEFINED_OP_FLAG;
-    if (klass == rb_cHash)   return HASH_REDEFINED_OP_FLAG;
-    if (klass == rb_cBignum) return BIGNUM_REDEFINED_OP_FLAG;
-    if (klass == rb_cSymbol) return SYMBOL_REDEFINED_OP_FLAG;
-    if (klass == rb_cTime)   return TIME_REDEFINED_OP_FLAG;
-    if (klass == rb_cRegexp) return REGEXP_REDEFINED_OP_FLAG;
-    return 0;
-}
-
 static void
 rb_vm_check_redefinition_opt_method(const rb_method_entry_t *me, VALUE klass)
 {
-    st_data_t bop;
+    st_data_t om;
     if (!me->def || me->def->type == VM_METHOD_TYPE_CFUNC) {
-	if (st_lookup(vm_opt_method_table, (st_data_t)me, &bop)) {
-	    int flag = vm_redefinition_check_flag(klass);
+	if (st_lookup(vm_opt_method_table, (st_data_t)me, &om)) {
+	    unsigned int i = om / OM_ALIGN_;
+	    rb_om_bitmap_t mask = (rb_om_bitmap_t)(1U << (om % OM_ALIGN_));
 
-	    ruby_vm_redefined_flag[bop] |= flag;
+	    ruby_vm_redefined_flag[i] |= mask;
 	}
     }
 }
@@ -1184,51 +1171,11 @@ rb_vm_check_redefinition_by_prepend(VALUE klass)
 }
 
 static void
-add_opt_method(VALUE klass, ID mid, VALUE bop)
-{
-    rb_method_entry_t *me = rb_method_entry_at(klass, mid);
-
-    if (me && me->def &&
-	me->def->type == VM_METHOD_TYPE_CFUNC) {
-	st_insert(vm_opt_method_table, (st_data_t)me, (st_data_t)bop);
-    }
-    else {
-	rb_bug("undefined optimized method: %s", rb_id2name(mid));
-    }
-}
-
-static void
 vm_init_redefined_flag(void)
 {
-    ID mid;
-    VALUE bop;
-
     vm_opt_method_table = st_init_numtable();
 
-#define OP(mid_, bop_) (mid = id##mid_, bop = BOP_##bop_, ruby_vm_redefined_flag[bop] = 0)
-#define C(k) add_opt_method(rb_c##k, mid, bop)
-    OP(PLUS, PLUS), (C(Fixnum), C(Float), C(String), C(Array));
-    OP(MINUS, MINUS), (C(Fixnum), C(Float));
-    OP(MULT, MULT), (C(Fixnum), C(Float));
-    OP(DIV, DIV), (C(Fixnum), C(Float));
-    OP(MOD, MOD), (C(Fixnum), C(Float));
-    OP(Eq, EQ), (C(Fixnum), C(Float), C(String));
-    OP(Eqq, EQQ), (C(Fixnum), C(Bignum), C(Float), C(Symbol), C(String));
-    OP(LT, LT), (C(Fixnum), C(Float));
-    OP(LE, LE), (C(Fixnum), C(Float));
-    OP(GT, GT), (C(Fixnum), C(Float));
-    OP(GE, GE), (C(Fixnum), C(Float));
-    OP(LTLT, LTLT), (C(String), C(Array));
-    OP(AREF, AREF), (C(Array), C(Hash));
-    OP(ASET, ASET), (C(Array), C(Hash));
-    OP(Length, LENGTH), (C(Array), C(String), C(Hash));
-    OP(Size, SIZE), (C(Array), C(String), C(Hash));
-    OP(EmptyP, EMPTY_P), (C(Array), C(String), C(Hash));
-    OP(Succ, SUCC), (C(Fixnum), C(String), C(Time));
-    OP(EqTilde, MATCH), (C(Regexp), C(String));
-    OP(Freeze, FREEZE), (C(String));
-#undef C
-#undef OP
+    vm_init_redefined_flags(vm_opt_method_table); /* opt_method.h.tmpl */
 }
 
 /* for vm development */
diff --git a/vm_core.h b/vm_core.h
index 9f0f053..c0f7454 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -24,6 +24,7 @@
 #include "method.h"
 #include "ruby_atomic.h"
 #include "ccan/list/list.h"
+#include "opt_method.h"
 
 #include "ruby/thread_native.h"
 #if   defined(_WIN32)
@@ -320,33 +321,6 @@ enum ruby_special_exceptions {
     ruby_special_error_count
 };
 
-enum ruby_basic_operators {
-    BOP_PLUS,
-    BOP_MINUS,
-    BOP_MULT,
-    BOP_DIV,
-    BOP_MOD,
-    BOP_EQ,
-    BOP_EQQ,
-    BOP_LT,
-    BOP_LE,
-    BOP_LTLT,
-    BOP_AREF,
-    BOP_ASET,
-    BOP_LENGTH,
-    BOP_SIZE,
-    BOP_EMPTY_P,
-    BOP_SUCC,
-    BOP_GT,
-    BOP_GE,
-    BOP_NOT,
-    BOP_NEQ,
-    BOP_MATCH,
-    BOP_FREEZE,
-
-    BOP_LAST_
-};
-
 #define GetVMPtr(obj, ptr) \
   GetCoreDataFromValue((obj), rb_vm_t, (ptr))
 
@@ -441,7 +415,7 @@ typedef struct rb_vm_struct {
 	size_t fiber_machine_stack_size;
     } default_params;
 
-    short redefined_flag[BOP_LAST_];
+    rb_om_bitmap_t redefined_flag[OM_SIZE_];
 } rb_vm_t;
 
 /* default values */
@@ -458,18 +432,8 @@ typedef struct rb_vm_struct {
 #define RUBY_VM_FIBER_MACHINE_STACK_SIZE      (  64 * 1024 * sizeof(VALUE)) /*  256 KB or  512 KB */
 #define RUBY_VM_FIBER_MACHINE_STACK_SIZE_MIN  (  16 * 1024 * sizeof(VALUE)) /*   64 KB or  128 KB */
 
-/* optimize insn */
-#define FIXNUM_REDEFINED_OP_FLAG (1 << 0)
-#define FLOAT_REDEFINED_OP_FLAG  (1 << 1)
-#define STRING_REDEFINED_OP_FLAG (1 << 2)
-#define ARRAY_REDEFINED_OP_FLAG  (1 << 3)
-#define HASH_REDEFINED_OP_FLAG   (1 << 4)
-#define BIGNUM_REDEFINED_OP_FLAG (1 << 5)
-#define SYMBOL_REDEFINED_OP_FLAG (1 << 6)
-#define TIME_REDEFINED_OP_FLAG   (1 << 7)
-#define REGEXP_REDEFINED_OP_FLAG (1 << 8)
-
-#define BASIC_OP_UNREDEFINED_P(op, klass) (LIKELY((GET_VM()->redefined_flag[(op)]&(klass)) == 0))
+#define BASIC_OP_UNREDEFINED_P(mid, klass) \
+	rb_basic_op_unredefined_p(OM_##mid##__##klass)
 
 #ifndef VM_DEBUG_BP_CHECK
 #define VM_DEBUG_BP_CHECK 0
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 05ed3c6..2aedb46 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -872,17 +872,17 @@ VALUE
 opt_eq_func(VALUE recv, VALUE obj, CALL_INFO ci)
 {
     if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_EQ, FIXNUM_REDEFINED_OP_FLAG)) {
+	BASIC_OP_UNREDEFINED_P(idEq, Fixnum)) {
 	return (recv == obj) ? Qtrue : Qfalse;
     }
     else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_EQ, FLOAT_REDEFINED_OP_FLAG)) {
+	     BASIC_OP_UNREDEFINED_P(idEq, Float)) {
 	return (recv == obj) ? Qtrue : Qfalse;
     }
     else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
 	if (RBASIC_CLASS(recv) == rb_cFloat &&
 	    RBASIC_CLASS(obj) == rb_cFloat &&
-	    BASIC_OP_UNREDEFINED_P(BOP_EQ, FLOAT_REDEFINED_OP_FLAG)) {
+	    BASIC_OP_UNREDEFINED_P(idEq, Float)) {
 	    double a = RFLOAT_VALUE(recv);
 	    double b = RFLOAT_VALUE(obj);
 
@@ -893,7 +893,7 @@ opt_eq_func(VALUE recv, VALUE obj, CALL_INFO ci)
 	}
 	else if (RBASIC_CLASS(recv) == rb_cString &&
 		 RBASIC_CLASS(obj) == rb_cString &&
-		 BASIC_OP_UNREDEFINED_P(BOP_EQ, STRING_REDEFINED_OP_FLAG)) {
+		 BASIC_OP_UNREDEFINED_P(idEq, String)) {
 	    return rb_str_equal(recv, obj);
 	}
     }
diff --git a/vm_insnhelper.h b/vm_insnhelper.h
index 31f8ffc..51dd658 100644
--- a/vm_insnhelper.h
+++ b/vm_insnhelper.h
@@ -229,5 +229,32 @@ enum vm_regan_acttype {
 static VALUE make_no_method_exception(VALUE exc, const char *format,
 				      VALUE obj, int argc, const VALUE *argv);
 
+static inline int
+rb_basic_op_unredefined_p(enum ruby_optimized_method om)
+{
+    unsigned int i = om / OM_ALIGN_;
+    rb_om_bitmap_t mask = (rb_om_bitmap_t)(1U << (om % OM_ALIGN_));
+
+    return LIKELY((GET_VM()->redefined_flag[i] & mask) == 0);
+}
+
+static inline int
+rb_basic_mask_unredefined_p(enum ruby_optimized_method om)
+{
+    unsigned int uom = (unsigned int)-om;
+    unsigned int offset = 0xffU & (uom >> OM_ALIGN_);
+    rb_om_bitmap_t mask = (rb_om_bitmap_t)(OM_GETMASK_ & uom);
+
+    return LIKELY((GET_VM()->redefined_flag[offset] & mask) == 0);
+}
+
+static inline int
+rb_opt_method_is_mask(enum ruby_optimized_method om)
+{
+    return !!((int)om < 0);
+}
+
+/* compile.c */
+void rb_undo_opt_str_lit(rb_control_frame_t *cfp);
 
 #endif /* RUBY_INSNHELPER_H */

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2014-10-18  2:51 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-18  2:51 [RFC] opt_str_lit: optimize string literals in many cases Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).