dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH 1/2] compile.c: move "literal" optimizations to peephole optimize
@ 2014-10-09  1:50 Eric Wong
  2014-10-09  1:51 ` [PATCH 2/2] add generic and flexible putstring_for insn Eric Wong
  0 siblings, 1 reply; 3+ messages in thread
From: Eric Wong @ 2014-10-09  1:50 UTC (permalink / raw)
  To: spew

`"literal".freeze', `obj["literal"]', and `obj["literal"] = val'
are all peephole optimizations and not appropriate for
iseq_compile_each.
---
 compile.c                      | 66 ++++++++++++++++++++++--------------------
 test/-ext-/symbol/test_type.rb |  1 +
 test/objspace/test_objspace.rb |  1 +
 test/ruby/envutil.rb           | 10 +++++++
 test/ruby/test_hash.rb         |  2 ++
 test/ruby/test_iseq.rb         |  1 +
 test/ruby/test_string.rb       |  7 +++++
 7 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/compile.c b/compile.c
index dda22b0..b38eedb 100644
--- a/compile.c
+++ b/compile.c
@@ -1819,6 +1819,41 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 	    }
 	}
     }
+
+    /* string literal optimizations */
+    if (iobj->insn_id == BIN(putstring)) {
+	INSN *niobj = (INSN *)get_next_insn((INSN *)list);
+
+	if (niobj && niobj->insn_id == BIN(send)) {
+	    rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0];
+
+	    if (ci->blockiseq == 0 &&
+		(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP) == 0) {
+
+		/* "literal".freeze -> opt_str_freeze("literal") */
+		if (ci->mid == idFreeze && ci->orig_argc == 0) {
+		    iobj->insn_id = BIN(opt_str_freeze);
+		    REMOVE_ELEM((LINK_ELEMENT *)niobj);
+		}
+
+		/* obj["literal"] -> opt_aref_with(obj, "literal") */
+		else if (ci->mid == idAREF && ci->orig_argc == 1) {
+		    VALUE *old_operands = iobj->operands;
+
+		    iobj->insn_id = BIN(opt_aref_with);
+		    iobj->operand_size = insn_len(iobj->insn_id) - 1;
+
+		    iobj->operands = (VALUE *)compile_data_alloc(iseq,
+					iobj->operand_size * sizeof(VALUE));
+		    iobj->operands[0] = (VALUE)ci;
+		    iobj->operands[1] = old_operands[0];
+
+		    REMOVE_ELEM((LINK_ELEMENT *)niobj);
+		}
+	    }
+	}
+    }
+
     return COMPILE_OK;
 }
 
@@ -4238,37 +4273,6 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	break;
       }
       case NODE_CALL:
-	/* optimization shortcut
-	 *   "literal".freeze -> opt_str_freeze("literal")
-	 */
-	if (node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
-	    node->nd_mid == idFreeze && node->nd_args == NULL)
-	{
-	    VALUE str = rb_fstring(node->nd_recv->nd_lit);
-	    iseq_add_mark_object(iseq, str);
-	    ADD_INSN1(ret, line, opt_str_freeze, str);
-	    if (poped) {
-		ADD_INSN(ret, line, pop);
-	    }
-	    break;
-	}
-	/* optimization shortcut
-	 *   obj["literal"] -> opt_aref_with(obj, "literal")
-	 */
-	if (node->nd_mid == idAREF && !private_recv_p(node) && node->nd_args &&
-	    nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 1 &&
-	    nd_type(node->nd_args->nd_head) == NODE_STR)
-	{
-	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
-	    node->nd_args->nd_head->nd_lit = str;
-	    COMPILE(ret, "recv", node->nd_recv);
-	    ADD_INSN2(ret, line, opt_aref_with,
-		      new_callinfo(iseq, idAREF, 1, 0, 0), str);
-	    if (poped) {
-		ADD_INSN(ret, line, pop);
-	    }
-	    break;
-	}
       case NODE_FCALL:
       case NODE_VCALL:{		/* VCALL: variable or call */
 	/*
diff --git a/test/-ext-/symbol/test_type.rb b/test/-ext-/symbol/test_type.rb
index f1749f5..5bd79b8 100644
--- a/test/-ext-/symbol/test_type.rb
+++ b/test/-ext-/symbol/test_type.rb
@@ -4,6 +4,7 @@ require "-test-/symbol"
 module Test_Symbol
   class TestType < Test::Unit::TestCase
     def test_id2str_fstring_bug9171
+      require_compile_option(:peephole_optimization)
       fstr = eval("# encoding: us-ascii
         'foobar'.freeze")
       assert_same fstr, Bug::Symbol.id2str(:foobar)
diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb
index 8a5ed34..faacf48 100644
--- a/test/objspace/test_objspace.rb
+++ b/test/objspace/test_objspace.rb
@@ -195,6 +195,7 @@ class TestObjSpace < Test::Unit::TestCase
   end
 
   def test_dump_flags
+    require_compile_option(:peephole_optimization)
     info = ObjectSpace.dump("foo".freeze)
     assert_match /"wb_protected":true, "old":true, "long_lived":true, "marked":true/, info
     assert_match /"fstring":true/, info
diff --git a/test/ruby/envutil.rb b/test/ruby/envutil.rb
index 81b982c..e844822 100644
--- a/test/ruby/envutil.rb
+++ b/test/ruby/envutil.rb
@@ -477,6 +477,16 @@ eom
         AssertFile
       end
 
+      def require_compile_option(opt)
+        case RubyVM::InstructionSequence.compile_option[opt]
+        when true
+        when false
+          skip(":#{opt} disabled")
+        else
+          raise ArgumentError, "unrecognized compile option: #{opt.inspect}"
+        end
+      end
+
       class << (AssertFile = Struct.new(:failure_message).new)
         include Assertions
         def assert_file_predicate(predicate, *args)
diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb
index 4431552..bb7e8b5 100644
--- a/test/ruby/test_hash.rb
+++ b/test/ruby/test_hash.rb
@@ -216,6 +216,7 @@ class TestHash < Test::Unit::TestCase
   end
 
   def test_AREF_fstring_key
+    require_compile_option(:peephole_optimization)
     h = {"abc" => 1}
     before = GC.stat(:total_allocated_objects)
     5.times{ h["abc"] }
@@ -230,6 +231,7 @@ class TestHash < Test::Unit::TestCase
   end
 
   def test_NEWHASH_fstring_key
+    require_compile_option(:peephole_optimization)
     a = {"ABC" => :t}
     b = {"ABC" => :t}
     assert_same a.keys[0], b.keys[0]
diff --git a/test/ruby/test_iseq.rb b/test/ruby/test_iseq.rb
index 94a814c..ac1c417 100644
--- a/test/ruby/test_iseq.rb
+++ b/test/ruby/test_iseq.rb
@@ -118,6 +118,7 @@ class TestISeq < Test::Unit::TestCase
   end
 
   def test_label_fstring
+    require_compile_option(:peephole_optimization)
     c = Class.new{ def foobar() end }
 
     a, b = eval("# encoding: us-ascii\n'foobar'.freeze"),
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index d82d2bc..4dc790f 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1908,6 +1908,13 @@ class TestString < Test::Unit::TestCase
     }
   end
 
+  def test_literal_freeze
+    require_compile_option(:peephole_optimization)
+    before = GC.stat(:total_allocated_objects)
+    5.times { "".freeze }
+    assert_equal before, GC.stat(:total_allocated_objects)
+  end
+
   class S2 < String
   end
   def test_str_new4
-- 
EW


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] add generic and flexible putstring_for insn
  2014-10-09  1:50 [PATCH 1/2] compile.c: move "literal" optimizations to peephole optimize Eric Wong
@ 2014-10-09  1:51 ` Eric Wong
  2014-10-09  4:03   ` [PATCH] compile.c: optimize << and == using putstring_for Eric Wong
  0 siblings, 1 reply; 3+ messages in thread
From: Eric Wong @ 2014-10-09  1:51 UTC (permalink / raw)
  To: spew

A new putstring_for instruction may replace all current uses of:

* opt_str_freeze
* opt_aref_with
* opt_aset_with

This new instruction should also be usable to implement new
optimizations to avoid rb_str_resurrect.

Optimizations for literal hash["literal"] (aref/lookup) and
"literal".freeze are easily moved to the peephole optimizer.

However, it seems easier to optimize `hash["literal"] = val'
in iseq_compile_each right now.

This reduces performance compared to the old opt_aref_with and
opt_aset_with instructions slightly, but is more elegant for in
avoiding special cases.  We may decide to resurrect opt_aref_with
and opt_aset_with if we want to recover the small performance loss
and can accept a bigger VM loop.

"".freeze performance is probably not interesting to anyone :)

benchmark results:
minimum results in each 5 measurements.
Execution time (sec)
name                    2.1.3   trunk   built
loop_whileloop2         0.106   0.106   0.106
vm2_hash_aref_lit*      0.503   0.162   0.192
vm2_hash_aset_lit*      0.587   0.214   0.241

Speedup ratio: compare with the result of `2.1.3' (greater is better)
name                    trunk   built
loop_whileloop2         1.000   0.998
vm2_hash_aref_lit*      3.099   2.621
vm2_hash_aset_lit*      2.741   2.435

raw data:

[["loop_whileloop2",
  [[0.10656525194644928,
    0.10594194941222668,
    0.10586611740291119,
    0.1066869841888547,
    0.10577277280390263],
   [0.1066260114312172,
    0.11153125017881393,
    0.1057466259226203,
    0.10648809000849724,
    0.10654668044298887],
   [0.10622590780258179,
    0.10608386714011431,
    0.105999612249434,
    0.10603114310652018,
    0.10603212099522352]]],
 ["vm2_hash_aref_lit",
  [[0.6088160118088126,
    0.6084766369313002,
    0.6114963851869106,
    0.6098374016582966,
    0.6085139447823167],
   [0.27083833049982786,
    0.2680424079298973,
    0.27930730395019054,
    0.26884936541318893,
    0.26798537466675043],
   [0.303865535184741,
    0.31264861673116684,
    0.2977857915684581,
    0.29944207333028316,
    0.30319013725966215]]],
 ["vm2_hash_aset_lit",
  [[0.6943842126056552,
    0.6933871945366263,
    0.694433419033885,
    0.6946460604667664,
    0.6931405374780297],
   [0.32219766546040773,
    0.3211979949846864,
    0.32000005338341,
    0.3278619237244129,
    0.3314230963587761],
   [0.3476126240566373,
    0.3471973007544875,
    0.3666635127738118,
    0.3474232777953148,
    0.3474671710282564]]]]
---
 benchmark/bm_vm2_hash_aref_lit.rb |   6 +++
 benchmark/bm_vm2_hash_aset_lit.rb |   6 +++
 compile.c                         |  71 ++++++++++++++++++--------
 insns.def                         | 102 +++++++++++++++++---------------------
 4 files changed, 106 insertions(+), 79 deletions(-)
 create mode 100644 benchmark/bm_vm2_hash_aref_lit.rb
 create mode 100644 benchmark/bm_vm2_hash_aset_lit.rb

diff --git a/benchmark/bm_vm2_hash_aref_lit.rb b/benchmark/bm_vm2_hash_aref_lit.rb
new file mode 100644
index 0000000..a6d4d12
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aref_lit.rb
@@ -0,0 +1,6 @@
+h = { "foo" => nil }
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  h["foo"]
+end
diff --git a/benchmark/bm_vm2_hash_aset_lit.rb b/benchmark/bm_vm2_hash_aset_lit.rb
new file mode 100644
index 0000000..58339ec
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aset_lit.rb
@@ -0,0 +1,6 @@
+h = {}
+i = 0
+while i<6_000_000 # while loop 2
+  i += 1
+  h["foo"] = nil
+end
diff --git a/compile.c b/compile.c
index b38eedb..10db05d 100644
--- a/compile.c
+++ b/compile.c
@@ -1703,6 +1703,32 @@ get_prev_insn(INSN *iobj)
     return 0;
 }
 
+static void
+swap_putstring_for_recv(rb_iseq_t *iseq, INSN *iobj,
+			enum ruby_basic_operators bop, int redef_flag, ID mid)
+{
+    VALUE recv_info = rb_ary_new_from_args(4,
+	    iobj->operands[0], INT2FIX(bop), INT2FIX(redef_flag), ID2SYM(mid));
+    OBJ_FREEZE(recv_info);
+    iobj->insn_id = BIN(putstring_for);
+    iobj->operands[0] = recv_info;
+    iseq_add_mark_object(iseq, recv_info); /* XXX check if needed */
+}
+
+static void
+swap_putstring_for_arg(rb_iseq_t *iseq, INSN *iobj,
+		enum ruby_basic_operators bop, int redef_flag,
+		VALUE klass, int recv_off)
+{
+    VALUE recv_info = rb_ary_new_from_args(5,
+	    iobj->operands[0], INT2FIX(bop), INT2FIX(redef_flag),
+	    klass, INT2FIX(recv_off));
+    OBJ_FREEZE(recv_info);
+    iobj->insn_id = BIN(putstring_for);
+    iobj->operands[0] = recv_info;
+    iseq_add_mark_object(iseq, recv_info); /* XXX check if needed */
+}
+
 static int
 iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcallopt)
 {
@@ -1827,28 +1853,19 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 	if (niobj && niobj->insn_id == BIN(send)) {
 	    rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0];
 
-	    if (ci->blockiseq == 0 &&
-		(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP) == 0) {
+	    if (ci->blockiseq == 0 && !(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP)) {
 
-		/* "literal".freeze -> opt_str_freeze("literal") */
+		/* "literal".freeze -> putstring_for("literal", :freeze, ...) */
 		if (ci->mid == idFreeze && ci->orig_argc == 0) {
-		    iobj->insn_id = BIN(opt_str_freeze);
+		    swap_putstring_for_recv(iseq, iobj, BOP_FREEZE,
+					    STRING_REDEFINED_OP_FLAG, ci->mid);
 		    REMOVE_ELEM((LINK_ELEMENT *)niobj);
 		}
 
-		/* obj["literal"] -> opt_aref_with(obj, "literal") */
+		/* obj["literal"] -> putstring_for("literal", Hash, 0) */
 		else if (ci->mid == idAREF && ci->orig_argc == 1) {
-		    VALUE *old_operands = iobj->operands;
-
-		    iobj->insn_id = BIN(opt_aref_with);
-		    iobj->operand_size = insn_len(iobj->insn_id) - 1;
-
-		    iobj->operands = (VALUE *)compile_data_alloc(iseq,
-					iobj->operand_size * sizeof(VALUE));
-		    iobj->operands[0] = (VALUE)ci;
-		    iobj->operands[1] = old_operands[0];
-
-		    REMOVE_ELEM((LINK_ELEMENT *)niobj);
+		    swap_putstring_for_arg(iseq, iobj, BOP_AREF,
+				    STRING_REDEFINED_OP_FLAG, rb_cHash, 0);
 		}
 	    }
 	}
@@ -5245,23 +5262,33 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	int asgnflag;
 
 	/* optimization shortcut
-	 *   obj["literal"] = value -> opt_aset_with(obj, "literal", value)
+	 *   obj["literal"] = val -> send(obj, :[]=, putstring_for("lit"), val)
+	 * TODO: ideally this should be done via peephole optimization,
+	 * but it gets tricky as the assigned value can be anything so
+	 * the peephole would need to do a lot of scanning.
 	 */
 	if (node->nd_mid == idASET && !private_recv_p(node) && node->nd_args &&
 	    nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 2 &&
 	    nd_type(node->nd_args->nd_head) == NODE_STR)
 	{
 	    VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
+	    VALUE recv_info = rb_ary_new_from_args(5, str,
+		    INT2FIX(BOP_ASET), INT2FIX(HASH_REDEFINED_OP_FLAG),
+		    rb_cHash, INT2FIX(0));
+
 	    node->nd_args->nd_head->nd_lit = str;
-	    iseq_add_mark_object(iseq, str);
+	    iseq_add_mark_object(iseq, recv_info);
+	    if (!poped) {
+		ADD_INSN(ret, line, putnil);
+	    }
 	    COMPILE(ret, "recv", node->nd_recv);
+	    ADD_INSN1(ret, line, putstring_for, recv_info);
 	    COMPILE(ret, "value", node->nd_args->nd_next->nd_head);
 	    if (!poped) {
-		ADD_INSN(ret, line, swap);
-		ADD_INSN1(ret, line, topn, INT2FIX(1));
+		ADD_INSN1(ret, line, setn, INT2FIX(3));
 	    }
-	    ADD_INSN2(ret, line, opt_aset_with,
-		      new_callinfo(iseq, idASET, 2, 0, 0), str);
+	    flag = VM_CALL_ARGS_SKIP_SETUP;
+	    ADD_SEND_R(ret, line, node->nd_mid, 2, 0, INT2FIX(flag));
 	    ADD_INSN(ret, line, pop);
 	    break;
 	}
diff --git a/insns.def b/insns.def
index bfa11a9..46d9393 100644
--- a/insns.def
+++ b/insns.def
@@ -356,6 +356,51 @@ putstring
 
 /**
   @c put
+  @e put string val. string will be copied lazily depending on conditions
+ */
+DEFINE_INSN
+putstring_for
+(VALUE recv_info)
+()
+(VALUE val)
+{
+    /*
+     * recv_info:
+     * 0 - str
+     * 1 - basic operator flag (BOP_*)
+     * 2 - redefined flag (*_REDEFINED_OP_FLAG)
+     * 3 - Class (optimized receiver class) or Symbol (method name)
+     * 4 - stack offset (Fixint), only present if [3] is a Class
+     */
+    const VALUE *ri = RARRAY_CONST_PTR(recv_info);
+    enum ruby_basic_operators bop = FIX2INT(ri[1]);
+    int redef_flag = FIX2INT(ri[2]);
+    VALUE msym_or_class = ri[3];
+
+    val = ri[0]; /* hopefully, this is the only val assignment we need */
+
+    /* check if the receiver is an on-stack object: */
+    if (!SYMBOL_P(msym_or_class)) {
+	VALUE recv = TOPN(FIX2INT(ri[4]));
+
+	if (SPECIAL_CONST_P(recv) ||
+		RBASIC_CLASS(recv) != msym_or_class ||
+		!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+	    /* bad, somebody redefined an optimized method, slow path: */
+	    val = rb_str_resurrect(val);
+	}
+    }
+    else { /* receiver is the string literal itself (e.g. "str".freeze) */
+	if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+	    /* bad, somebody redefined an optimized method, slow path: */
+	    val = rb_str_resurrect(val);
+	    val = rb_funcall(val, SYM2ID(msym_or_class), 0);
+	}
+    }
+}
+
+/**
+  @c put
   @e put concatenate strings
   @j スタックトップの文字列を n 個連結し,結果をスタックにプッシュする。
  */
@@ -999,20 +1044,6 @@ send
     CALL_METHOD(ci);
 }
 
-DEFINE_INSN
-opt_str_freeze
-(VALUE str)
-()
-(VALUE val)
-{
-    if (BASIC_OP_UNREDEFINED_P(BOP_FREEZE, STRING_REDEFINED_OP_FLAG)) {
-	val = str;
-    }
-    else {
-	val = rb_funcall(rb_str_resurrect(str), idFreeze, 0);
-    }
-}
-
 /**
   @c optimize
   @e Invoke method without block, splat
@@ -1903,49 +1934,6 @@ opt_aset
 
 /**
   @c optimize
-  @e recv[str] = set
-  @j 最適化された recv[str] = set。
- */
-DEFINE_INSN
-opt_aset_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv, VALUE val)
-(VALUE val)
-{
-    if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) {
-	rb_hash_aset(recv, key, val);
-    }
-    else {
-	PUSH(recv);
-	PUSH(rb_str_resurrect(key));
-	PUSH(val);
-	CALL_SIMPLE_METHOD(recv);
-    }
-}
-
-/**
-  @c optimize
-  @e recv[str]
-  @j 最適化された recv[str]。
- */
-DEFINE_INSN
-opt_aref_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv)
-(VALUE val)
-{
-    if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) {
-	val = rb_hash_aref(recv, key);
-    }
-    else {
-	PUSH(recv);
-	PUSH(rb_str_resurrect(key));
-	CALL_SIMPLE_METHOD(recv);
-    }
-}
-
-/**
-  @c optimize
   @e optimized length
   @j 最適化された recv.length()。
  */
-- 
EW


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH] compile.c: optimize << and == using putstring_for
  2014-10-09  1:51 ` [PATCH 2/2] add generic and flexible putstring_for insn Eric Wong
@ 2014-10-09  4:03   ` Eric Wong
  0 siblings, 0 replies; 3+ messages in thread
From: Eric Wong @ 2014-10-09  4:03 UTC (permalink / raw)
  To: spew

This optimizes `obj << "literal"' and `obj == "literal"' calls.

Note: `"literal" == obj' is not optimized, yet.  We may have the
same problem as opt_aset_with in the peephole optimizer.
---
 benchmark/bm_vm2_strcat.rb |  7 ++++++
 benchmark/bm_vm2_streq1.rb |  6 +++++
 compile.c                  | 14 +++++++++++-
 test/ruby/test_string.rb   | 56 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 benchmark/bm_vm2_strcat.rb
 create mode 100644 benchmark/bm_vm2_streq1.rb

diff --git a/benchmark/bm_vm2_strcat.rb b/benchmark/bm_vm2_strcat.rb
new file mode 100644
index 0000000..b25ac6e
--- /dev/null
+++ b/benchmark/bm_vm2_strcat.rb
@@ -0,0 +1,7 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  str << "const"
+  str.clear
+end
diff --git a/benchmark/bm_vm2_streq1.rb b/benchmark/bm_vm2_streq1.rb
new file mode 100644
index 0000000..2a4b0f8
--- /dev/null
+++ b/benchmark/bm_vm2_streq1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  foo == "literal"
+end
diff --git a/compile.c b/compile.c
index 10db05d..6e83c3c 100644
--- a/compile.c
+++ b/compile.c
@@ -1865,7 +1865,19 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 		/* obj["literal"] -> putstring_for("literal", Hash, 0) */
 		else if (ci->mid == idAREF && ci->orig_argc == 1) {
 		    swap_putstring_for_arg(iseq, iobj, BOP_AREF,
-				    STRING_REDEFINED_OP_FLAG, rb_cHash, 0);
+				    HASH_REDEFINED_OP_FLAG, rb_cHash, 0);
+		}
+
+		/* optimize allocation: obj == "lit" */
+		else if (ci->mid == idEq && ci->orig_argc == 1) {
+		    swap_putstring_for_arg(iseq, iobj, BOP_EQ,
+				    STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+		}
+
+		/* optimize allocation: obj << "lit" */
+		else if (ci->mid == idLTLT && ci->orig_argc == 1) {
+		    swap_putstring_for_arg(iseq, iobj, BOP_LTLT,
+				    STRING_REDEFINED_OP_FLAG, rb_cString, 0);
 		}
 	    }
 	}
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 4dc790f..4165e97 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2279,6 +2279,33 @@ class TestString < Test::Unit::TestCase
     end;
   end if [0].pack("l!").bytesize < [nil].pack("p").bytesize
   # enable only when string size range is smaller than memory space
+
+  def test_opt_strcat_with
+    assert_separately([], <<-RUBY)
+      class String
+        undef <<
+        def <<(str)
+          "overridden"
+        end
+      end
+      assert_equal("overridden", "" << "foo")
+      foo = "foo"
+      assert_equal("overridden", foo << "bar")
+    RUBY
+
+    if @cls == String
+      nr = 10
+      recv = ""
+      before = GC.stat(:total_allocated_objects)
+      nr.times { recv << "constant" }
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal "constant" * nr, recv
+
+      before = GC.stat(:total_allocated_objects)
+      nr.times { "recv" << "constant" }
+      assert_equal before + nr, GC.stat(:total_allocated_objects)
+    end
+  end
 end
 
 class TestString2 < TestString
@@ -2286,4 +2313,33 @@ class TestString2 < TestString
     super
     @cls = S2
   end
+
+  def test_opt_streq1
+    assert_separately([], <<-RUBY)
+      class String
+        undef ==
+        def ==(str)
+          :TROO
+        end
+      end
+      assert_equal(:TROO, ("foo" == "foo"))
+    RUBY
+
+    if @cls == String
+      nr = 10
+
+      recv = "something"
+      res = []
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << (recv == "constant") }
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal [ false ], res.uniq!
+
+      res.clear
+      before = GC.stat(:total_allocated_objects)
+      nr.times { res << (recv == "something") }
+      assert_equal before, GC.stat(:total_allocated_objects)
+      assert_equal [ true ], res.uniq!
+    end
+  end
 end
-- 
EW


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-10-09  4:03 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-09  1:50 [PATCH 1/2] compile.c: move "literal" optimizations to peephole optimize Eric Wong
2014-10-09  1:51 ` [PATCH 2/2] add generic and flexible putstring_for insn Eric Wong
2014-10-09  4:03   ` [PATCH] compile.c: optimize << and == using putstring_for Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).