dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH v2] micro-optimize case dispatch even harder
@ 2015-12-08  5:08 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2015-12-08  5:08 UTC (permalink / raw)
  To: spew

I noticed these optimizations while working on r52931.

By using a bare hash table, we avoid the overhead of rb_hash_*
functions as well as the cost of translating FIX2INT for jump
labels.

This also reduces GC overhead, as the iseq mark array no longer
carries redundant objects nor the Hash object.

* vm_core.h (CDHASH): change to st_table *
* benchmark/bm_vm2_case_small.rb: new benchmark
* compile.c (struct cdhash_set_label_struct): change to st_table *
  (cdhash_set_label_i): adjust to store int in cdhash
  (iseq_set_sequence): adjust for type change
  (when_vals): ditto
  (iseq_compile_each): ditto
  (iseq_build_from_ary_body): ditto
  (cdhash_from_ary): extract from iseq_build_from_ary_body
* insns.def (opt_case_dispatch): ditto
* iseq.c (cdhash_each): ditto
  (iseq_data_to_ary): ditto
* test/ruby/test_iseq.rb (test_memleak): new test
  [ruby-core:71931] [Feature #11786]

Slightly smaller improvements with leak fix, but it could be noise
for the small cases.

2015-12-08 05:06:17 +0000
target 0: a (ruby 2.3.0dev (2015-12-08 trunk 52938) [x86_64-linux]) at "/home/ew/rrrr/b/ruby"
target 1: b (ruby 2.3.0dev (2015-12-08 master 52938) [x86_64-linux]
last_commit=compile.c: wrap literal object) at "/home/ew/ruby/b/ruby"

benchmark results:

minimum results in each 5 measurements.
Execution time (sec)
name	a	b
loop_whileloop2	0.103	0.103
vm2_case*	0.075	0.064
vm2_case_lit*	0.560	0.561
vm2_case_small*	0.061	0.055

Speedup ratio: compare with the result of `a' (greater is better)
name	b
loop_whileloop2	1.000
vm2_case*	1.169
vm2_case_lit*	1.000
vm2_case_small*	1.119
---
 benchmark/bm_vm2_case_small.rb |  9 +++++
 compile.c                      | 79 ++++++++++++++++++++++--------------------
 insns.def                      |  6 ++--
 iseq.c                         |  6 ++--
 test/ruby/test_iseq.rb         | 22 ++++++++++++
 vm_core.h                      |  2 +-
 6 files changed, 79 insertions(+), 45 deletions(-)
 create mode 100644 benchmark/bm_vm2_case_small.rb

diff --git a/benchmark/bm_vm2_case_small.rb b/benchmark/bm_vm2_case_small.rb
new file mode 100644
index 0000000..b009a3e
--- /dev/null
+++ b/benchmark/bm_vm2_case_small.rb
@@ -0,0 +1,9 @@
+i = 0
+while i<6_000_000 # while loop 2
+  case :foo
+  when :foo
+    i += 1
+  else
+    raise
+  end
+end
diff --git a/compile.c b/compile.c
index bb0a07c..896e70c 100644
--- a/compile.c
+++ b/compile.c
@@ -1492,17 +1492,17 @@ static const struct st_hash_type cdhash_type = {
 };
 
 struct cdhash_set_label_struct {
-    VALUE hash;
+    st_table *cdh;
     int pos;
     int len;
 };
 
 static int
-cdhash_set_label_i(VALUE key, VALUE val, void *ptr)
+cdhash_set_label_i(VALUE key, long val, void *ptr)
 {
-    struct cdhash_set_label_struct *data = (struct cdhash_set_label_struct *)ptr;
+    struct cdhash_set_label_struct *d = (struct cdhash_set_label_struct *)ptr;
     LABEL *lobj = (LABEL *)(val & ~1);
-    rb_hash_aset(data->hash, key, INT2FIX(lobj->position - (data->pos+data->len)));
+    st_insert(d->cdh, key, lobj->position - (d->pos + d->len));
     return ST_CONTINUE;
 }
 
@@ -1630,15 +1630,14 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *anchor)
 			}
 		      case TS_CDHASH:
 			{
-			    VALUE map = operands[j];
-			    struct cdhash_set_label_struct data;
-                            data.hash = map;
-                            data.pos = code_index;
-                            data.len = len;
-			    rb_hash_foreach(map, cdhash_set_label_i, (VALUE)&data);
-
-			    freeze_hide_obj(map);
-			    generated_iseq[code_index + 1 + j] = map;
+			    st_table *cdh = (st_table *)operands[j];
+			    struct cdhash_set_label_struct d;
+			    d.cdh = cdh;
+			    d.pos = code_index;
+			    d.len = len;
+
+			    st_foreach(cdh, cdhash_set_label_i, (st_data_t)&d);
+			    generated_iseq[code_index + 1 + j] = (VALUE)cdh;
 			    break;
 			}
 		      case TS_LINDEX:
@@ -2932,7 +2931,8 @@ case_when_optimizable_literal(NODE * node)
 }
 
 static int
-when_vals(rb_iseq_t *iseq, LINK_ANCHOR *cond_seq, NODE *vals, LABEL *l1, int only_special_literals, VALUE literals)
+when_vals(rb_iseq_t *iseq, LINK_ANCHOR *cond_seq, NODE *vals, LABEL *l1,
+	  int only_special_literals, st_table *literals)
 {
     while (vals) {
 	NODE* val = vals->nd_head;
@@ -2942,11 +2942,11 @@ when_vals(rb_iseq_t *iseq, LINK_ANCHOR *cond_seq, NODE *vals, LABEL *l1, int onl
 	    only_special_literals = 0;
 	}
 	else {
-	    if (rb_hash_lookup(literals, lit) != Qnil) {
+	    if (st_lookup(literals, lit, 0)) {
 		rb_compile_warning(ruby_sourcefile, nd_line(val), "duplicated when clause is ignored");
 	    }
 	    else {
-		rb_hash_aset(literals, lit, (VALUE)(l1) | 1);
+		st_add_direct(literals, lit, (VALUE)(l1) | 1);
 	    }
 	}
 
@@ -3703,14 +3703,13 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	DECL_ANCHOR(body_seq);
 	DECL_ANCHOR(cond_seq);
 	int only_special_literals = 1;
-	VALUE literals = rb_hash_new();
+	st_table *literals = st_init_table(&cdhash_type);
+	VALUE lit_wrapper = Data_Wrap_Struct(0, 0, st_free_table, literals);
 
 	INIT_ANCHOR(head);
 	INIT_ANCHOR(body_seq);
 	INIT_ANCHOR(cond_seq);
 
-	rb_hash_tbl_raw(literals)->type = &cdhash_type;
-
 	if (node->nd_head == 0) {
 	    COMPILE_(ret, "when", node->nd_body, poped);
 	    break;
@@ -3789,8 +3788,7 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
 	}
 
 	if (only_special_literals) {
-	    iseq_add_mark_object(iseq, literals);
-
+	    iseq_add_mark_object(iseq, lit_wrapper);
 	    ADD_INSN(ret, nd_line(tempnode), dup);
 	    ADD_INSN2(ret, nd_line(tempnode), opt_case_dispatch, literals, elselabel);
 	    LABEL_REF(elselabel);
@@ -6298,6 +6296,27 @@ iseq_build_callinfo_from_hash(rb_iseq_t *iseq, VALUE op)
     return (VALUE)new_callinfo(iseq, mid, orig_argc, flag, kw_arg, (flag & VM_CALL_ARGS_SIMPLE) == 0);
 }
 
+static VALUE
+cdhash_from_ary(rb_iseq_t *iseq, st_table *labels_table, VALUE op)
+{
+    int i;
+    st_table *literals = st_init_table(&cdhash_type);
+    VALUE lit_wrap = Data_Wrap_Struct(0, 0, st_free_table, literals);
+
+    iseq_add_mark_object(iseq, lit_wrap);
+    op = rb_convert_type(op, T_ARRAY, "Array", "to_ary");
+    for (i = 0; i < RARRAY_LEN(op); i += 2) {
+	VALUE key = RARRAY_AREF(op, i);
+	VALUE sym = RARRAY_AREF(op, i + 1);
+	LABEL *label = register_label(iseq, labels_table, sym);
+
+	st_add_direct(literals, key, (VALUE)label | 1);
+    }
+    RB_GC_GUARD(op);
+
+    return (VALUE)literals;
+}
+
 static int
 iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *anchor,
 			 VALUE body, VALUE labels_wrapper)
@@ -6401,23 +6420,7 @@ iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *anchor,
 						  "Symbol", "to_sym");
 			break;
 		      case TS_CDHASH:
-			{
-			    int i;
-			    VALUE map = rb_hash_new();
-
-			    rb_hash_tbl_raw(map)->type = &cdhash_type;
-			    op = rb_convert_type(op, T_ARRAY, "Array", "to_ary");
-			    for (i=0; i<RARRAY_LEN(op); i+=2) {
-				VALUE key = RARRAY_AREF(op, i);
-				VALUE sym = RARRAY_AREF(op, i+1);
-				LABEL *label =
-				  register_label(iseq, labels_table, sym);
-				rb_hash_aset(map, key, (VALUE)label | 1);
-			    }
-			    RB_GC_GUARD(op);
-			    argv[j] = map;
-			    rb_iseq_add_mark_object(iseq, map);
-			}
+			argv[j] = cdhash_from_ary(iseq, labels_table, op);
 			break;
 		      case TS_FUNCPTR:
 			{
diff --git a/insns.def b/insns.def
index 3c4d980..4de3a6d 100644
--- a/insns.def
+++ b/insns.def
@@ -1253,7 +1253,7 @@ once
  */
 DEFINE_INSN
 opt_case_dispatch
-(CDHASH hash, OFFSET else_offset)
+(CDHASH cdh, OFFSET else_offset)
 (..., VALUE key)
 () // inc += -1;
 {
@@ -1281,8 +1281,8 @@ opt_case_dispatch
 				   FALSE_REDEFINED_OP_FLAG  |
 				   STRING_REDEFINED_OP_FLAG)) {
 	    st_data_t val;
-	    if (st_lookup(RHASH_TBL_RAW(hash), key, &val)) {
-		JUMP(FIX2INT((VALUE)val));
+	    if (st_lookup(cdh, key, &val)) {
+		JUMP(val);
 	    }
 	    else {
 		JUMP(else_offset);
diff --git a/iseq.c b/iseq.c
index 17bfee4..de08beb 100644
--- a/iseq.c
+++ b/iseq.c
@@ -1754,7 +1754,7 @@ static int
 cdhash_each(VALUE key, VALUE value, VALUE ary)
 {
     rb_ary_push(ary, obj_resurrect(key));
-    rb_ary_push(ary, value);
+    rb_ary_push(ary, INT2FIX(value));
     return ST_CONTINUE;
 }
 
@@ -1961,11 +1961,11 @@ iseq_data_to_ary(const rb_iseq_t *iseq)
 		break;
 	      case TS_CDHASH:
 		{
-		    VALUE hash = *seq;
+		    st_table *cdh = (st_table *)*seq;
 		    VALUE val = rb_ary_new();
 		    int i;
 
-		    rb_hash_foreach(hash, cdhash_each, val);
+		    st_foreach(cdh, cdhash_each, val);
 
 		    for (i=0; i<RARRAY_LEN(val); i+=2) {
 			VALUE pos = FIX2INT(rb_ary_entry(val, i+1));
diff --git a/test/ruby/test_iseq.rb b/test/ruby/test_iseq.rb
index 4f7616a..2855872 100644
--- a/test/ruby/test_iseq.rb
+++ b/test/ruby/test_iseq.rb
@@ -185,4 +185,26 @@ def test_safe_call_chain
     labels = body.select {|op, arg| op == :branchnil}.map {|op, arg| arg}
     assert_equal(1, labels.uniq.size)
   end
+
+  def test_memleak
+    assert_no_memory_leak([], <<-'end;', '35_000.times(&doit)')
+      doit = proc { RubyVM::InstructionSequence.compile('
+        case $foo
+        when "0" then 0
+        when "1" then 1
+        when "2" then 2
+        when "3" then 3
+        when "4" then 4
+        when "5" then 5
+        when "6" then 6
+        when "7" then 7
+        when "8" then 8
+        when "9" then 9
+        else
+          -1
+        end
+      ') }
+      10.times(&doit)
+    end;
+  end
 end
diff --git a/vm_core.h b/vm_core.h
index aecbe61..c700382 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -917,7 +917,7 @@ typedef struct rb_call_cache *CALL_CACHE;
 
 void rb_vm_change_state(void);
 
-typedef VALUE CDHASH;
+typedef st_table * CDHASH;
 
 #ifndef FUNC_FASTCALL
 #define FUNC_FASTCALL(x) x
-- 
EW


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2015-12-08  5:08 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-12-08  5:08 [PATCH v2] micro-optimize case dispatch even harder Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).