* [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize
@ 2014-10-18 2:41 Eric Wong
2014-10-18 2:41 ` [PATCH 02/18] add generic and flexible opt_str_lit insn Eric Wong
` (16 more replies)
0 siblings, 17 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:41 UTC (permalink / raw)
To: spew
`"literal".freeze', `obj["literal"]', and `obj["literal"] = val'
are all peephole optimizations and not appropriate for
iseq_compile_each.
---
compile.c | 66 ++++++++++++++++++++++--------------------
test/-ext-/symbol/test_type.rb | 1 +
test/objspace/test_objspace.rb | 1 +
test/ruby/envutil.rb | 10 +++++++
test/ruby/test_hash.rb | 2 ++
test/ruby/test_iseq.rb | 1 +
test/ruby/test_string.rb | 7 +++++
7 files changed, 57 insertions(+), 31 deletions(-)
diff --git a/compile.c b/compile.c
index 8df7acf..bd8f75f 100644
--- a/compile.c
+++ b/compile.c
@@ -1819,6 +1819,41 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
}
}
}
+
+ /* string literal optimizations */
+ if (iobj->insn_id == BIN(putstring)) {
+ INSN *niobj = (INSN *)get_next_insn((INSN *)list);
+
+ if (niobj && niobj->insn_id == BIN(send)) {
+ rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0];
+
+ if (ci->blockiseq == 0 &&
+ (ci->flag & ~VM_CALL_ARGS_SKIP_SETUP) == 0) {
+
+ /* "literal".freeze -> opt_str_freeze("literal") */
+ if (ci->mid == idFreeze && ci->orig_argc == 0) {
+ iobj->insn_id = BIN(opt_str_freeze);
+ REMOVE_ELEM((LINK_ELEMENT *)niobj);
+ }
+
+ /* obj["literal"] -> opt_aref_with(obj, "literal") */
+ else if (ci->mid == idAREF && ci->orig_argc == 1) {
+ VALUE *old_operands = iobj->operands;
+
+ iobj->insn_id = BIN(opt_aref_with);
+ iobj->operand_size = insn_len(iobj->insn_id) - 1;
+
+ iobj->operands = (VALUE *)compile_data_alloc(iseq,
+ iobj->operand_size * sizeof(VALUE));
+ iobj->operands[0] = (VALUE)ci;
+ iobj->operands[1] = old_operands[0];
+
+ REMOVE_ELEM((LINK_ELEMENT *)niobj);
+ }
+ }
+ }
+ }
+
return COMPILE_OK;
}
@@ -4238,37 +4273,6 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
break;
}
case NODE_CALL:
- /* optimization shortcut
- * "literal".freeze -> opt_str_freeze("literal")
- */
- if (node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
- node->nd_mid == idFreeze && node->nd_args == NULL)
- {
- VALUE str = rb_fstring(node->nd_recv->nd_lit);
- iseq_add_mark_object(iseq, str);
- ADD_INSN1(ret, line, opt_str_freeze, str);
- if (poped) {
- ADD_INSN(ret, line, pop);
- }
- break;
- }
- /* optimization shortcut
- * obj["literal"] -> opt_aref_with(obj, "literal")
- */
- if (node->nd_mid == idAREF && !private_recv_p(node) && node->nd_args &&
- nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 1 &&
- nd_type(node->nd_args->nd_head) == NODE_STR)
- {
- VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
- node->nd_args->nd_head->nd_lit = str;
- COMPILE(ret, "recv", node->nd_recv);
- ADD_INSN2(ret, line, opt_aref_with,
- new_callinfo(iseq, idAREF, 1, 0, 0), str);
- if (poped) {
- ADD_INSN(ret, line, pop);
- }
- break;
- }
case NODE_FCALL:
case NODE_VCALL:{ /* VCALL: variable or call */
/*
diff --git a/test/-ext-/symbol/test_type.rb b/test/-ext-/symbol/test_type.rb
index f1749f5..5bd79b8 100644
--- a/test/-ext-/symbol/test_type.rb
+++ b/test/-ext-/symbol/test_type.rb
@@ -4,6 +4,7 @@ require "-test-/symbol"
module Test_Symbol
class TestType < Test::Unit::TestCase
def test_id2str_fstring_bug9171
+ require_compile_option(:peephole_optimization)
fstr = eval("# encoding: us-ascii
'foobar'.freeze")
assert_same fstr, Bug::Symbol.id2str(:foobar)
diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb
index 8a5ed34..faacf48 100644
--- a/test/objspace/test_objspace.rb
+++ b/test/objspace/test_objspace.rb
@@ -195,6 +195,7 @@ class TestObjSpace < Test::Unit::TestCase
end
def test_dump_flags
+ require_compile_option(:peephole_optimization)
info = ObjectSpace.dump("foo".freeze)
assert_match /"wb_protected":true, "old":true, "long_lived":true, "marked":true/, info
assert_match /"fstring":true/, info
diff --git a/test/ruby/envutil.rb b/test/ruby/envutil.rb
index f5fbb7c..bddaf82 100644
--- a/test/ruby/envutil.rb
+++ b/test/ruby/envutil.rb
@@ -520,6 +520,16 @@ eom
end
end
+ def require_compile_option(opt)
+ case RubyVM::InstructionSequence.compile_option[opt]
+ when true
+ when false
+ skip(":#{opt} disabled")
+ else
+ raise ArgumentError, "unrecognized compile option: #{opt.inspect}"
+ end
+ end
+
class << (AssertFile = Struct.new(:failure_message).new)
include Assertions
def assert_file_predicate(predicate, *args)
diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb
index 4431552..bb7e8b5 100644
--- a/test/ruby/test_hash.rb
+++ b/test/ruby/test_hash.rb
@@ -216,6 +216,7 @@ class TestHash < Test::Unit::TestCase
end
def test_AREF_fstring_key
+ require_compile_option(:peephole_optimization)
h = {"abc" => 1}
before = GC.stat(:total_allocated_objects)
5.times{ h["abc"] }
@@ -230,6 +231,7 @@ class TestHash < Test::Unit::TestCase
end
def test_NEWHASH_fstring_key
+ require_compile_option(:peephole_optimization)
a = {"ABC" => :t}
b = {"ABC" => :t}
assert_same a.keys[0], b.keys[0]
diff --git a/test/ruby/test_iseq.rb b/test/ruby/test_iseq.rb
index 94a814c..ac1c417 100644
--- a/test/ruby/test_iseq.rb
+++ b/test/ruby/test_iseq.rb
@@ -118,6 +118,7 @@ class TestISeq < Test::Unit::TestCase
end
def test_label_fstring
+ require_compile_option(:peephole_optimization)
c = Class.new{ def foobar() end }
a, b = eval("# encoding: us-ascii\n'foobar'.freeze"),
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 543c138..7c8dc12 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1910,6 +1910,13 @@ class TestString < Test::Unit::TestCase
}
end
+ def test_literal_freeze
+ require_compile_option(:peephole_optimization)
+ before = GC.stat(:total_allocated_objects)
+ 5.times { "".freeze }
+ assert_equal before, GC.stat(:total_allocated_objects)
+ end
+
class S2 < String
end
def test_str_new4
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 02/18] add generic and flexible opt_str_lit insn
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
@ 2014-10-18 2:41 ` Eric Wong
2014-10-18 2:41 ` [PATCH 03/18] compile.c: optimize << and == using putstring_for Eric Wong
` (15 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:41 UTC (permalink / raw)
To: spew
A new opt_str_lit instruction may replace all current uses of:
* opt_str_freeze
* opt_aref_with
* opt_aset_with
This new instruction should also be usable to implement new
optimizations to avoid rb_str_resurrect.
Optimizations for literal hash["literal"] (aref/lookup) and
"literal".freeze are easily moved to the peephole optimizer.
However, it seems easier to optimize `hash["literal"] = val'
in iseq_compile_each right now.
This reduces performance compared to the old opt_aref_with and
opt_aset_with instructions slightly, but is more elegant for in
avoiding special cases. We may decide to resurrect opt_aref_with
and opt_aset_with if we want to recover the small performance loss
and can accept a bigger VM loop.
"".freeze performance is probably not interesting to anyone :)
benchmark results:
minimum results in each 5 measurements.
Execution time (sec)
name 2.1.3 trunk built
loop_whileloop2 0.106 0.106 0.106
vm2_hash_aref_lit* 0.503 0.162 0.192
vm2_hash_aset_lit* 0.587 0.214 0.241
Speedup ratio: compare with the result of `2.1.3' (greater is better)
name trunk built
loop_whileloop2 1.000 0.998
vm2_hash_aref_lit* 3.099 2.621
vm2_hash_aset_lit* 2.741 2.435
raw data:
[["loop_whileloop2",
[[0.10656525194644928,
0.10594194941222668,
0.10586611740291119,
0.1066869841888547,
0.10577277280390263],
[0.1066260114312172,
0.11153125017881393,
0.1057466259226203,
0.10648809000849724,
0.10654668044298887],
[0.10622590780258179,
0.10608386714011431,
0.105999612249434,
0.10603114310652018,
0.10603212099522352]]],
["vm2_hash_aref_lit",
[[0.6088160118088126,
0.6084766369313002,
0.6114963851869106,
0.6098374016582966,
0.6085139447823167],
[0.27083833049982786,
0.2680424079298973,
0.27930730395019054,
0.26884936541318893,
0.26798537466675043],
[0.303865535184741,
0.31264861673116684,
0.2977857915684581,
0.29944207333028316,
0.30319013725966215]]],
["vm2_hash_aset_lit",
[[0.6943842126056552,
0.6933871945366263,
0.694433419033885,
0.6946460604667664,
0.6931405374780297],
[0.32219766546040773,
0.3211979949846864,
0.32000005338341,
0.3278619237244129,
0.3314230963587761],
[0.3476126240566373,
0.3471973007544875,
0.3666635127738118,
0.3474232777953148,
0.3474671710282564]]]]
---
benchmark/bm_vm2_hash_aref_lit.rb | 6 +++
benchmark/bm_vm2_hash_aset_lit.rb | 6 +++
compile.c | 74 ++++++++++++++++++---------
insns.def | 102 +++++++++++++++++---------------------
4 files changed, 108 insertions(+), 80 deletions(-)
create mode 100644 benchmark/bm_vm2_hash_aref_lit.rb
create mode 100644 benchmark/bm_vm2_hash_aset_lit.rb
diff --git a/benchmark/bm_vm2_hash_aref_lit.rb b/benchmark/bm_vm2_hash_aref_lit.rb
new file mode 100644
index 0000000..a6d4d12
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aref_lit.rb
@@ -0,0 +1,6 @@
+h = { "foo" => nil }
+i = 0
+while i<6_000_000 # while loop 2
+ i += 1
+ h["foo"]
+end
diff --git a/benchmark/bm_vm2_hash_aset_lit.rb b/benchmark/bm_vm2_hash_aset_lit.rb
new file mode 100644
index 0000000..58339ec
--- /dev/null
+++ b/benchmark/bm_vm2_hash_aset_lit.rb
@@ -0,0 +1,6 @@
+h = {}
+i = 0
+while i<6_000_000 # while loop 2
+ i += 1
+ h["foo"] = nil
+end
diff --git a/compile.c b/compile.c
index bd8f75f..b5d3152 100644
--- a/compile.c
+++ b/compile.c
@@ -1703,6 +1703,32 @@ get_prev_insn(INSN *iobj)
return 0;
}
+static void
+opt_str_lit_recv(rb_iseq_t *iseq, INSN *iobj,
+ enum ruby_basic_operators bop, int redef_flag, ID mid)
+{
+ VALUE recv_info = rb_ary_new_from_args(4,
+ iobj->operands[0], INT2FIX(bop), INT2FIX(redef_flag), ID2SYM(mid));
+ OBJ_FREEZE(recv_info);
+ iobj->insn_id = BIN(opt_str_lit);
+ iobj->operands[0] = recv_info;
+ iseq_add_mark_object(iseq, recv_info); /* XXX check if needed */
+}
+
+static void
+opt_str_lit_arg(rb_iseq_t *iseq, INSN *iobj,
+ enum ruby_basic_operators bop, int redef_flag,
+ VALUE klass, int recv_off)
+{
+ VALUE recv_info = rb_ary_new_from_args(5,
+ iobj->operands[0], INT2FIX(bop), INT2FIX(redef_flag),
+ klass, INT2FIX(recv_off));
+ OBJ_FREEZE(recv_info);
+ iobj->insn_id = BIN(opt_str_lit);
+ iobj->operands[0] = recv_info;
+ iseq_add_mark_object(iseq, recv_info); /* XXX check if needed */
+}
+
static int
iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcallopt)
{
@@ -1827,28 +1853,19 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
if (niobj && niobj->insn_id == BIN(send)) {
rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0];
- if (ci->blockiseq == 0 &&
- (ci->flag & ~VM_CALL_ARGS_SKIP_SETUP) == 0) {
+ if (ci->blockiseq == 0 && !(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP)) {
- /* "literal".freeze -> opt_str_freeze("literal") */
+ /* "literal".freeze -> opt_str_lit("literal", :freeze, ...) */
if (ci->mid == idFreeze && ci->orig_argc == 0) {
- iobj->insn_id = BIN(opt_str_freeze);
+ opt_str_lit_recv(iseq, iobj, BOP_FREEZE,
+ STRING_REDEFINED_OP_FLAG, ci->mid);
REMOVE_ELEM((LINK_ELEMENT *)niobj);
}
- /* obj["literal"] -> opt_aref_with(obj, "literal") */
+ /* obj["literal"] -> opt_str_lit("literal", Hash, 0) */
else if (ci->mid == idAREF && ci->orig_argc == 1) {
- VALUE *old_operands = iobj->operands;
-
- iobj->insn_id = BIN(opt_aref_with);
- iobj->operand_size = insn_len(iobj->insn_id) - 1;
-
- iobj->operands = (VALUE *)compile_data_alloc(iseq,
- iobj->operand_size * sizeof(VALUE));
- iobj->operands[0] = (VALUE)ci;
- iobj->operands[1] = old_operands[0];
-
- REMOVE_ELEM((LINK_ELEMENT *)niobj);
+ opt_str_lit_arg(iseq, iobj, BOP_AREF,
+ HASH_REDEFINED_OP_FLAG, rb_cHash, 0);
}
}
}
@@ -5245,23 +5262,34 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
int asgnflag;
/* optimization shortcut
- * obj["literal"] = value -> opt_aset_with(obj, "literal", value)
+ * obj["literal"] = val -> send(obj, :[]=, opt_str_lit("lit"), val)
+ * TODO: ideally this should be done inside iseq_peephole_optimize,
+ * but that would require a lot of scanning as the `val' (2nd arg)
+ * is of variable distance between the :putstring and :send insns
*/
- if (node->nd_mid == idASET && !private_recv_p(node) && node->nd_args &&
+ if (iseq->compile_data->option->peephole_optimization &&
+ node->nd_mid == idASET && !private_recv_p(node) && node->nd_args &&
nd_type(node->nd_args) == NODE_ARRAY && node->nd_args->nd_alen == 2 &&
nd_type(node->nd_args->nd_head) == NODE_STR)
{
VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
+ VALUE recv_info = rb_ary_new_from_args(5, str,
+ INT2FIX(BOP_ASET), INT2FIX(HASH_REDEFINED_OP_FLAG),
+ rb_cHash, INT2FIX(0));
+
node->nd_args->nd_head->nd_lit = str;
- iseq_add_mark_object(iseq, str);
+ iseq_add_mark_object(iseq, recv_info);
+ if (!poped) {
+ ADD_INSN(ret, line, putnil);
+ }
COMPILE(ret, "recv", node->nd_recv);
+ ADD_INSN1(ret, line, opt_str_lit, recv_info);
COMPILE(ret, "value", node->nd_args->nd_next->nd_head);
if (!poped) {
- ADD_INSN(ret, line, swap);
- ADD_INSN1(ret, line, topn, INT2FIX(1));
+ ADD_INSN1(ret, line, setn, INT2FIX(3));
}
- ADD_INSN2(ret, line, opt_aset_with,
- new_callinfo(iseq, idASET, 2, 0, 0), str);
+ flag = VM_CALL_ARGS_SKIP_SETUP;
+ ADD_SEND_R(ret, line, node->nd_mid, 2, 0, INT2FIX(flag));
ADD_INSN(ret, line, pop);
break;
}
diff --git a/insns.def b/insns.def
index bfa11a9..9a98bf8 100644
--- a/insns.def
+++ b/insns.def
@@ -356,6 +356,51 @@ putstring
/**
@c put
+ @e put string val. string may be created depending on recv_info conditions
+ */
+DEFINE_INSN
+opt_str_lit
+(VALUE recv_info)
+()
+(VALUE val)
+{
+ /*
+ * recv_info:
+ * 0 - str
+ * 1 - basic operator flag (BOP_*)
+ * 2 - redefined flag (*_REDEFINED_OP_FLAG)
+ * 3 - Class (optimized receiver class) or Symbol (method name)
+ * 4 - stack offset (Fixint), only present if [3] is a Class
+ */
+ const VALUE *ri = RARRAY_CONST_PTR(recv_info);
+ enum ruby_basic_operators bop = FIX2INT(ri[1]);
+ int redef_flag = FIX2INT(ri[2]);
+ VALUE msym_or_class = ri[3];
+
+ val = ri[0]; /* hopefully, this is the only val assignment we need */
+
+ /* check if the receiver is an on-stack object: */
+ if (!SYMBOL_P(msym_or_class)) {
+ VALUE recv = TOPN(FIX2INT(ri[4]));
+
+ if (SPECIAL_CONST_P(recv) ||
+ RBASIC_CLASS(recv) != msym_or_class ||
+ !BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+ /* bad, somebody redefined an optimized method, slow path: */
+ val = rb_str_resurrect(val);
+ }
+ }
+ else { /* receiver is the string literal itself (e.g. "str".freeze) */
+ if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+ /* bad, somebody redefined an optimized method, slow path: */
+ val = rb_str_resurrect(val);
+ val = rb_funcall(val, SYM2ID(msym_or_class), 0);
+ }
+ }
+}
+
+/**
+ @c put
@e put concatenate strings
@j スタックトップの文字列を n 個連結し,結果をスタックにプッシュする。
*/
@@ -999,20 +1044,6 @@ send
CALL_METHOD(ci);
}
-DEFINE_INSN
-opt_str_freeze
-(VALUE str)
-()
-(VALUE val)
-{
- if (BASIC_OP_UNREDEFINED_P(BOP_FREEZE, STRING_REDEFINED_OP_FLAG)) {
- val = str;
- }
- else {
- val = rb_funcall(rb_str_resurrect(str), idFreeze, 0);
- }
-}
-
/**
@c optimize
@e Invoke method without block, splat
@@ -1903,49 +1934,6 @@ opt_aset
/**
@c optimize
- @e recv[str] = set
- @j 最適化された recv[str] = set。
- */
-DEFINE_INSN
-opt_aset_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv, VALUE val)
-(VALUE val)
-{
- if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) {
- rb_hash_aset(recv, key, val);
- }
- else {
- PUSH(recv);
- PUSH(rb_str_resurrect(key));
- PUSH(val);
- CALL_SIMPLE_METHOD(recv);
- }
-}
-
-/**
- @c optimize
- @e recv[str]
- @j 最適化された recv[str]。
- */
-DEFINE_INSN
-opt_aref_with
-(CALL_INFO ci, VALUE key)
-(VALUE recv)
-(VALUE val)
-{
- if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) {
- val = rb_hash_aref(recv, key);
- }
- else {
- PUSH(recv);
- PUSH(rb_str_resurrect(key));
- CALL_SIMPLE_METHOD(recv);
- }
-}
-
-/**
- @c optimize
@e optimized length
@j 最適化された recv.length()。
*/
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 03/18] compile.c: optimize << and == using putstring_for
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
2014-10-18 2:41 ` [PATCH 02/18] add generic and flexible opt_str_lit insn Eric Wong
@ 2014-10-18 2:41 ` Eric Wong
2014-10-18 2:41 ` [PATCH 04/18] opt_str_lit: further optimizations and cleanups Eric Wong
` (14 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:41 UTC (permalink / raw)
To: spew
This optimizes `obj << "literal"' and `obj == "literal"' calls.
Note: `"literal" == obj' is not optimized, yet. We may have the
same problem as opt_aset_with in the peephole optimizer.
---
benchmark/bm_vm2_strcat.rb | 7 ++++++
benchmark/bm_vm2_streq1.rb | 6 +++++
compile.c | 12 ++++++++++
test/ruby/test_string.rb | 56 ++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 81 insertions(+)
create mode 100644 benchmark/bm_vm2_strcat.rb
create mode 100644 benchmark/bm_vm2_streq1.rb
diff --git a/benchmark/bm_vm2_strcat.rb b/benchmark/bm_vm2_strcat.rb
new file mode 100644
index 0000000..b25ac6e
--- /dev/null
+++ b/benchmark/bm_vm2_strcat.rb
@@ -0,0 +1,7 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ str << "const"
+ str.clear
+end
diff --git a/benchmark/bm_vm2_streq1.rb b/benchmark/bm_vm2_streq1.rb
new file mode 100644
index 0000000..2a4b0f8
--- /dev/null
+++ b/benchmark/bm_vm2_streq1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ foo == "literal"
+end
diff --git a/compile.c b/compile.c
index b5d3152..d301579 100644
--- a/compile.c
+++ b/compile.c
@@ -1867,6 +1867,18 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
opt_str_lit_arg(iseq, iobj, BOP_AREF,
HASH_REDEFINED_OP_FLAG, rb_cHash, 0);
}
+
+ /* optimize allocation: obj == "lit" */
+ else if (ci->mid == idEq && ci->orig_argc == 1) {
+ opt_str_lit_arg(iseq, iobj, BOP_EQ,
+ STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ }
+
+ /* optimize allocation: obj << "lit" */
+ else if (ci->mid == idLTLT && ci->orig_argc == 1) {
+ opt_str_lit_arg(iseq, iobj, BOP_LTLT,
+ STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ }
}
}
}
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 7c8dc12..56b2e2d 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2281,6 +2281,33 @@ class TestString < Test::Unit::TestCase
end;
end if [0].pack("l!").bytesize < [nil].pack("p").bytesize
# enable only when string size range is smaller than memory space
+
+ def test_opt_strcat_with
+ assert_separately([], <<-RUBY)
+ class String
+ undef <<
+ def <<(str)
+ "overridden"
+ end
+ end
+ assert_equal("overridden", "" << "foo")
+ foo = "foo"
+ assert_equal("overridden", foo << "bar")
+ RUBY
+
+ if @cls == String
+ nr = 10
+ recv = ""
+ before = GC.stat(:total_allocated_objects)
+ nr.times { recv << "constant" }
+ assert_equal before, GC.stat(:total_allocated_objects)
+ assert_equal "constant" * nr, recv
+
+ before = GC.stat(:total_allocated_objects)
+ nr.times { "recv" << "constant" }
+ assert_equal before + nr, GC.stat(:total_allocated_objects)
+ end
+ end
end
class TestString2 < TestString
@@ -2288,4 +2315,33 @@ class TestString2 < TestString
super
@cls = S2
end
+
+ def test_opt_streq1
+ assert_separately([], <<-RUBY)
+ class String
+ undef ==
+ def ==(str)
+ :TROO
+ end
+ end
+ assert_equal(:TROO, ("foo" == "foo"))
+ RUBY
+
+ if @cls == String
+ nr = 10
+
+ recv = "something"
+ res = []
+ before = GC.stat(:total_allocated_objects)
+ nr.times { res << (recv == "constant") }
+ assert_equal before, GC.stat(:total_allocated_objects)
+ assert_equal [ false ], res.uniq!
+
+ res.clear
+ before = GC.stat(:total_allocated_objects)
+ nr.times { res << (recv == "something") }
+ assert_equal before, GC.stat(:total_allocated_objects)
+ assert_equal [ true ], res.uniq!
+ end
+ end
end
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 04/18] opt_str_lit: further optimizations and cleanups
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
2014-10-18 2:41 ` [PATCH 02/18] add generic and flexible opt_str_lit insn Eric Wong
2014-10-18 2:41 ` [PATCH 03/18] compile.c: optimize << and == using putstring_for Eric Wong
@ 2014-10-18 2:41 ` Eric Wong
2014-10-18 2:41 ` [PATCH 05/18] opt_str_lit: optimize allocations for +, %, * and === calls Eric Wong
` (13 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:41 UTC (permalink / raw)
To: spew
Optimize the following:
* "string" == obj
* "string" != obj
* obj == "string"
* "string".size
* "string".length
`"string" == obj' should have roughly the same performance as the more
common `obj == "string"' comparision. Likewise for cases where != is
called on a literal string. These are all easy-to-optimize without
requiring any new VM instructions.
The size and length optimizations are probably not needed
frequently, but they were trivial and require no new VM instructions,
either.
---
benchmark/bm_vm2_streq2.rb | 6 ++
compile.c | 150 ++++++++++++++++++++++++++++++++-------------
insns.def | 40 +++++++-----
test/ruby/test_string.rb | 47 ++++++++++++--
4 files changed, 181 insertions(+), 62 deletions(-)
create mode 100644 benchmark/bm_vm2_streq2.rb
diff --git a/benchmark/bm_vm2_streq2.rb b/benchmark/bm_vm2_streq2.rb
new file mode 100644
index 0000000..986020d
--- /dev/null
+++ b/benchmark/bm_vm2_streq2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ "literal" == foo
+end
diff --git a/compile.c b/compile.c
index d301579..50f9777 100644
--- a/compile.c
+++ b/compile.c
@@ -1703,30 +1703,43 @@ get_prev_insn(INSN *iobj)
return 0;
}
-static void
-opt_str_lit_recv(rb_iseq_t *iseq, INSN *iobj,
+static VALUE
+new_recvinfo_for_put(rb_iseq_t *iseq, VALUE str,
+ enum ruby_basic_operators bop, int redef_flag)
+{
+ VALUE ri = rb_ary_new_from_args(3, str, INT2FIX(bop), INT2FIX(redef_flag));
+
+ hide_obj(ri);
+ iseq_add_mark_object(iseq, ri);
+
+ return ri;
+}
+
+static VALUE
+new_recvinfo_for_call(rb_iseq_t *iseq, VALUE str,
enum ruby_basic_operators bop, int redef_flag, ID mid)
{
- VALUE recv_info = rb_ary_new_from_args(4,
- iobj->operands[0], INT2FIX(bop), INT2FIX(redef_flag), ID2SYM(mid));
- OBJ_FREEZE(recv_info);
- iobj->insn_id = BIN(opt_str_lit);
- iobj->operands[0] = recv_info;
- iseq_add_mark_object(iseq, recv_info); /* XXX check if needed */
+ VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(bop), INT2FIX(redef_flag),
+ ID2SYM(mid));
+
+ hide_obj(ri);
+ iseq_add_mark_object(iseq, ri);
+
+ return ri;
}
-static void
-opt_str_lit_arg(rb_iseq_t *iseq, INSN *iobj,
+static VALUE
+new_recvinfo_for_arg(rb_iseq_t *iseq, VALUE str,
enum ruby_basic_operators bop, int redef_flag,
VALUE klass, int recv_off)
{
- VALUE recv_info = rb_ary_new_from_args(5,
- iobj->operands[0], INT2FIX(bop), INT2FIX(redef_flag),
- klass, INT2FIX(recv_off));
- OBJ_FREEZE(recv_info);
- iobj->insn_id = BIN(opt_str_lit);
- iobj->operands[0] = recv_info;
- iseq_add_mark_object(iseq, recv_info); /* XXX check if needed */
+ VALUE ri = rb_ary_new_from_args(5, str, INT2FIX(bop), INT2FIX(redef_flag),
+ klass, INT2FIX(recv_off));
+
+ hide_obj(ri);
+ iseq_add_mark_object(iseq, ri);
+
+ return ri;
}
static int
@@ -1853,31 +1866,61 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
if (niobj && niobj->insn_id == BIN(send)) {
rb_call_info_t *ci = (rb_call_info_t *)niobj->operands[0];
- if (ci->blockiseq == 0 && !(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP)) {
-
- /* "literal".freeze -> opt_str_lit("literal", :freeze, ...) */
- if (ci->mid == idFreeze && ci->orig_argc == 0) {
- opt_str_lit_recv(iseq, iobj, BOP_FREEZE,
- STRING_REDEFINED_OP_FLAG, ci->mid);
- REMOVE_ELEM((LINK_ELEMENT *)niobj);
- }
-
- /* obj["literal"] -> opt_str_lit("literal", Hash, 0) */
- else if (ci->mid == idAREF && ci->orig_argc == 1) {
- opt_str_lit_arg(iseq, iobj, BOP_AREF,
+ if (!ci->blockiseq && !(ci->flag & ~VM_CALL_ARGS_SKIP_SETUP)) {
+ VALUE ri = Qfalse;
+ VALUE str = iobj->operands[0];
+
+ switch (ci->orig_argc) {
+ case 0:
+ /*
+ * optimize:
+ * "literal".freeze
+ * "literal".size
+ * "literal".length
+ */
+ switch (ci->mid) {
+ case idFreeze:
+ ri = new_recvinfo_for_call(iseq, str, BOP_FREEZE,
+ STRING_REDEFINED_OP_FLAG, ci->mid);
+ REMOVE_ELEM((LINK_ELEMENT *)niobj);
+ break;
+ case idSize:
+ ri = new_recvinfo_for_put(iseq, str, BOP_SIZE,
+ STRING_REDEFINED_OP_FLAG);
+ break;
+ case idLength:
+ ri = new_recvinfo_for_put(iseq, str, BOP_LENGTH,
+ STRING_REDEFINED_OP_FLAG);
+ break;
+ }
+ break;
+ case 1:
+ switch (ci->mid) {
+ case idAREF:
+ /* optimize allocation: obj["lit"] */
+ ri = new_recvinfo_for_arg(iseq, str, BOP_AREF,
HASH_REDEFINED_OP_FLAG, rb_cHash, 0);
- }
-
- /* optimize allocation: obj == "lit" */
- else if (ci->mid == idEq && ci->orig_argc == 1) {
- opt_str_lit_arg(iseq, iobj, BOP_EQ,
+ break;
+ case idEq:
+ /* optimize allocation: obj == "lit" */
+ ri = new_recvinfo_for_arg(iseq, str, BOP_EQ,
STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ break;
+ case idNeq:
+ /* optimize allocation: obj != "lit" */
+ ri = new_recvinfo_for_arg(iseq, str, BOP_NEQ,
+ STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ break;
+ case idLTLT:
+ /* optimize allocation: obj << "lit" */
+ ri = new_recvinfo_for_arg(iseq, str, BOP_LTLT,
+ STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ break;
+ }
}
-
- /* optimize allocation: obj << "lit" */
- else if (ci->mid == idLTLT && ci->orig_argc == 1) {
- opt_str_lit_arg(iseq, iobj, BOP_LTLT,
- STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ if (ri != Qfalse) {
+ iobj->insn_id = BIN(opt_str_lit);
+ iobj->operands[0] = ri;
}
}
}
@@ -4385,7 +4428,27 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
#endif
/* receiver */
if (type == NODE_CALL) {
- COMPILE(recv, "recv", node->nd_recv);
+ /*
+ * optimize:
+ * "yoda" == other -> opt_str_lit("yoda").send(:==, other)
+ * "yoda" != other -> opt_str_lit("yoda").send(:!=, other)
+ */
+ if (iseq->compile_data->option->peephole_optimization &&
+ (mid == idEq || mid == idNeq) &&
+ !private_recv_p(node) &&
+ node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
+ node->nd_args && nd_type(node->nd_args) == NODE_ARRAY &&
+ node->nd_args->nd_alen == 1)
+ {
+ VALUE yoda = rb_fstring(node->nd_recv->nd_lit);
+ VALUE recv_info = new_recvinfo_for_put(iseq, yoda,
+ BOP_EQ, STRING_REDEFINED_OP_FLAG);
+
+ node->nd_recv->nd_lit = yoda;
+ ADD_INSN1(recv, line, opt_str_lit, recv_info);
+ } else {
+ COMPILE(recv, "recv", node->nd_recv);
+ }
}
else if (type == NODE_FCALL || type == NODE_VCALL) {
ADD_CALL_RECEIVER(recv, line);
@@ -5285,12 +5348,11 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
nd_type(node->nd_args->nd_head) == NODE_STR)
{
VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
- VALUE recv_info = rb_ary_new_from_args(5, str,
- INT2FIX(BOP_ASET), INT2FIX(HASH_REDEFINED_OP_FLAG),
- rb_cHash, INT2FIX(0));
+ VALUE recv_info = new_recvinfo_for_arg(iseq, str,
+ BOP_ASET, HASH_REDEFINED_OP_FLAG,
+ rb_cHash, 0);
node->nd_args->nd_head->nd_lit = str;
- iseq_add_mark_object(iseq, recv_info);
if (!poped) {
ADD_INSN(ret, line, putnil);
}
diff --git a/insns.def b/insns.def
index 9a98bf8..f6740f8 100644
--- a/insns.def
+++ b/insns.def
@@ -369,32 +369,44 @@ opt_str_lit
* 0 - str
* 1 - basic operator flag (BOP_*)
* 2 - redefined flag (*_REDEFINED_OP_FLAG)
+ * optional:
* 3 - Class (optimized receiver class) or Symbol (method name)
- * 4 - stack offset (Fixint), only present if [3] is a Class
+ * 4 - stack offset (Fixint), only present if [3] is a Class,
+ * -1 stack offset means receiver is the frozen string literal itself
*/
const VALUE *ri = RARRAY_CONST_PTR(recv_info);
+ long len = RARRAY_LEN(recv_info);
enum ruby_basic_operators bop = FIX2INT(ri[1]);
int redef_flag = FIX2INT(ri[2]);
- VALUE msym_or_class = ri[3];
val = ri[0]; /* hopefully, this is the only val assignment we need */
-
- /* check if the receiver is an on-stack object: */
- if (!SYMBOL_P(msym_or_class)) {
- VALUE recv = TOPN(FIX2INT(ri[4]));
-
- if (SPECIAL_CONST_P(recv) ||
- RBASIC_CLASS(recv) != msym_or_class ||
- !BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
- /* bad, somebody redefined an optimized method, slow path: */
- val = rb_str_resurrect(val);
+ if (len > 3) {
+ VALUE msym_or_class = ri[3];
+
+ /* check if the receiver is an on-stack object: */
+ if (!SYMBOL_P(msym_or_class)) {
+ int n = FIX2INT(ri[4]);
+ VALUE recv = n < 0 ? val : TOPN(n);
+
+ if (SPECIAL_CONST_P(recv) ||
+ RBASIC_CLASS(recv) != msym_or_class ||
+ !BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+ /* bad, somebody redefined an optimized method, slow path: */
+ val = rb_str_resurrect(val);
+ }
+ }
+ else { /* receiver is the string literal itself (e.g. "str".freeze) */
+ if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+ /* bad, somebody redefined an optimized method, slow path: */
+ val = rb_str_resurrect(val);
+ val = rb_funcall(val, SYM2ID(msym_or_class), 0);
+ }
}
}
- else { /* receiver is the string literal itself (e.g. "str".freeze) */
+ else { /* string lit is receiver, but there are args */
if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
/* bad, somebody redefined an optimized method, slow path: */
val = rb_str_resurrect(val);
- val = rb_funcall(val, SYM2ID(msym_or_class), 0);
}
}
}
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 56b2e2d..90ccfd9 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2316,7 +2316,7 @@ class TestString2 < TestString
@cls = S2
end
- def test_opt_streq1
+ def test_opt_str_eq_neq
assert_separately([], <<-RUBY)
class String
undef ==
@@ -2324,7 +2324,36 @@ class TestString2 < TestString
:TROO
end
end
- assert_equal(:TROO, ("foo" == "foo"))
+ foo = "foo"
+ assert_equal(:TROO, (foo == "foo"), 'string == "peephole 2nd pass"')
+ assert_equal(:TROO, ("foo" == foo), '"yoda 1st pass" == string')
+ RUBY
+
+ assert_separately([], <<-RUBY)
+ class String
+ undef !=
+ def !=(str)
+ :NOT
+ end
+ end
+ foo = ""
+ assert_equal(:NOT, ("foo" != foo), '"yoda 1st pass" != string')
+ assert_equal(:NOT, (foo != "foo"), 'string != "peephole 2nd pass"')
+ RUBY
+
+ assert_separately([], <<-RUBY)
+ class String
+ undef size
+ undef length
+ def size
+ 42
+ end
+ def length
+ 42
+ end
+ end
+ assert_equal(42, "".size, 'lit string size')
+ assert_equal(42, "".length, 'lit string size')
RUBY
if @cls == String
@@ -2333,13 +2362,23 @@ class TestString2 < TestString
recv = "something"
res = []
before = GC.stat(:total_allocated_objects)
- nr.times { res << (recv == "constant") }
+ nr.times { res << (recv == "constant") } # opt_streq1
+ nr.times { res << ("constant" == recv) } # opt_streq2
+ nr.times { res << ("something " != recv) } # 1st pass peephole
+ nr.times { res << (recv != "something") } # 2nd pass peephole
assert_equal before, GC.stat(:total_allocated_objects)
assert_equal [ false ], res.uniq!
res.clear
before = GC.stat(:total_allocated_objects)
- nr.times { res << (recv == "something") }
+ nr.times { res << (recv == "something") } # opt_streq1
+ nr.times { res << ("something" == recv) } # opt_streq2
+ nr.times { res << ("constant" != recv) } # 1st pass peephole
+ nr.times { res << (recv != "constant") } # 2nd pass peephole
+ nr.times { res << ("a" != "b") } # 1st pass peephole
+ nr.times { res << ("a" == "a") } # 1st pass peephole
+ nr.times { res << ("".size == 0) } # 2nd pass peephole
+ nr.times { res << ("".length == 0) } # 2nd pass peephole
assert_equal before, GC.stat(:total_allocated_objects)
assert_equal [ true ], res.uniq!
end
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 05/18] opt_str_lit: optimize allocations for +, %, * and === calls
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (2 preceding siblings ...)
2014-10-18 2:41 ` [PATCH 04/18] opt_str_lit: further optimizations and cleanups Eric Wong
@ 2014-10-18 2:41 ` Eric Wong
2014-10-18 2:41 ` [PATCH 06/18] vm: automatically define optimized method enums Eric Wong
` (12 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:41 UTC (permalink / raw)
To: spew
While experienced Rubyists know String#<< and interpolated
strings can avoid unecessary object overhead, String#+ is
often easier-to-type and the first choice for some Rubyists.
Avoid penalizing users of String#+ unnecessarily
(but keep in mind using String#<< or interpolated strings
can further reduce overhead).
It is also common to use literal format strings with '%',
so optimize allocations away from those calls.
Some users may use === directly for comparing strings, so
avoid allocating if they happen to use string literals.
While we're at it, optimizing allocations for String#* is utterly
trivial (one extra case) I use `"lit" * Fixnum' sometimes.
---
benchmark/bm_vm2_streqq1.rb | 6 +++++
benchmark/bm_vm2_streqq2.rb | 6 +++++
benchmark/bm_vm2_strfmt.rb | 5 ++++
benchmark/bm_vm2_strplus1.rb | 6 +++++
benchmark/bm_vm2_strplus2.rb | 6 +++++
compile.c | 32 ++++++++++++++++++++++--
test/ruby/test_string.rb | 59 +++++++++++++++++++++++++++++++++++++++++++-
7 files changed, 117 insertions(+), 3 deletions(-)
create mode 100644 benchmark/bm_vm2_streqq1.rb
create mode 100644 benchmark/bm_vm2_streqq2.rb
create mode 100644 benchmark/bm_vm2_strfmt.rb
create mode 100644 benchmark/bm_vm2_strplus1.rb
create mode 100644 benchmark/bm_vm2_strplus2.rb
diff --git a/benchmark/bm_vm2_streqq1.rb b/benchmark/bm_vm2_streqq1.rb
new file mode 100644
index 0000000..9183466
--- /dev/null
+++ b/benchmark/bm_vm2_streqq1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ foo === "literal"
+end
diff --git a/benchmark/bm_vm2_streqq2.rb b/benchmark/bm_vm2_streqq2.rb
new file mode 100644
index 0000000..f48a9cd
--- /dev/null
+++ b/benchmark/bm_vm2_streqq2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "literal"
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ "literal" === foo
+end
diff --git a/benchmark/bm_vm2_strfmt.rb b/benchmark/bm_vm2_strfmt.rb
new file mode 100644
index 0000000..efb88b6
--- /dev/null
+++ b/benchmark/bm_vm2_strfmt.rb
@@ -0,0 +1,5 @@
+i = 0
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ "%d" % i
+end
diff --git a/benchmark/bm_vm2_strplus1.rb b/benchmark/bm_vm2_strplus1.rb
new file mode 100644
index 0000000..714efb8
--- /dev/null
+++ b/benchmark/bm_vm2_strplus1.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "a"
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ foo + "b"
+end
diff --git a/benchmark/bm_vm2_strplus2.rb b/benchmark/bm_vm2_strplus2.rb
new file mode 100644
index 0000000..c7f91ed
--- /dev/null
+++ b/benchmark/bm_vm2_strplus2.rb
@@ -0,0 +1,6 @@
+i = 0
+foo = "a"
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ "b" + foo
+end
diff --git a/compile.c b/compile.c
index 50f9777..a40831c 100644
--- a/compile.c
+++ b/compile.c
@@ -1916,6 +1916,16 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
ri = new_recvinfo_for_arg(iseq, str, BOP_LTLT,
STRING_REDEFINED_OP_FLAG, rb_cString, 0);
break;
+ case idPLUS:
+ /* optimize allocation: obj + "lit" */
+ ri = new_recvinfo_for_arg(iseq, str, BOP_PLUS,
+ STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ break;
+ case idEqq:
+ /* optimize allocation: obj === "lit" */
+ ri = new_recvinfo_for_arg(iseq, str, BOP_EQQ,
+ STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ break;
}
}
if (ri != Qfalse) {
@@ -3203,6 +3213,20 @@ build_postexe_iseq(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *body)
return Qnil;
}
+static enum ruby_basic_operators
+opt_str_lit_recv_bop(ID mid)
+{
+ switch (mid) {
+ case idEq: return BOP_EQ;
+ case idNeq: return BOP_NEQ;
+ case idPLUS: return BOP_PLUS;
+ case idMULT: return BOP_MULT;
+ case idMOD: return BOP_MOD;
+ case idEqq: return BOP_EQQ;
+ }
+ return BOP_LAST_;
+}
+
/**
compile each node
@@ -4428,13 +4452,17 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
#endif
/* receiver */
if (type == NODE_CALL) {
+ enum ruby_basic_operators bop;
/*
* optimize:
* "yoda" == other -> opt_str_lit("yoda").send(:==, other)
* "yoda" != other -> opt_str_lit("yoda").send(:!=, other)
+ * "str" + other -> opt_str_lit("str").send(:+, other)
+ * "str" * other -> opt_str_lit("str").send(:*, other)
+ * "fmt" % args -> opt_str_lit("str").send(:%, other)
*/
if (iseq->compile_data->option->peephole_optimization &&
- (mid == idEq || mid == idNeq) &&
+ ((bop = opt_str_lit_recv_bop(mid)) != BOP_LAST_) &&
!private_recv_p(node) &&
node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
node->nd_args && nd_type(node->nd_args) == NODE_ARRAY &&
@@ -4442,7 +4470,7 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
{
VALUE yoda = rb_fstring(node->nd_recv->nd_lit);
VALUE recv_info = new_recvinfo_for_put(iseq, yoda,
- BOP_EQ, STRING_REDEFINED_OP_FLAG);
+ bop, STRING_REDEFINED_OP_FLAG);
node->nd_recv->nd_lit = yoda;
ADD_INSN1(recv, line, opt_str_lit, recv_info);
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 90ccfd9..a2abe40 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2316,7 +2316,7 @@ class TestString2 < TestString
@cls = S2
end
- def test_opt_str_eq_neq
+ def test_opt_str_lit
assert_separately([], <<-RUBY)
class String
undef ==
@@ -2356,6 +2356,40 @@ class TestString2 < TestString
assert_equal(42, "".length, 'lit string size')
RUBY
+ assert_separately([], <<-RUBY)
+ class String
+ undef +
+ def +(other)
+ :plus
+ end
+ end
+ foo = "a"
+ assert_equal(:plus, "" + foo, 'lit plus')
+ assert_equal(:plus, foo + "", 'plus lit')
+ RUBY
+
+ assert_separately([], <<-RUBY)
+ class String
+ undef *
+ def *(other)
+ :mult
+ end
+ end
+ assert_equal(:mult, "x" * 3, 'lit mult')
+ RUBY
+
+ assert_separately([], <<-RUBY)
+ class String
+ undef ===
+ def ===(other)
+ other
+ end
+ end
+ str = "y"
+ assert_equal(false, "x" === false, 'lit threequal')
+ assert_equal("x", str === "x", 'threequal lit')
+ RUBY
+
if @cls == String
nr = 10
@@ -2365,6 +2399,8 @@ class TestString2 < TestString
nr.times { res << (recv == "constant") } # opt_streq1
nr.times { res << ("constant" == recv) } # opt_streq2
nr.times { res << ("something " != recv) } # 1st pass peephole
+ nr.times { res << ("constant" == recv) } # opt_streq2
+ nr.times { res << ("constant" === recv) } # opt_streqq2
nr.times { res << (recv != "something") } # 2nd pass peephole
assert_equal before, GC.stat(:total_allocated_objects)
assert_equal [ false ], res.uniq!
@@ -2373,6 +2409,8 @@ class TestString2 < TestString
before = GC.stat(:total_allocated_objects)
nr.times { res << (recv == "something") } # opt_streq1
nr.times { res << ("something" == recv) } # opt_streq2
+ nr.times { res << ("something" === recv) } # opt_streqq2
+ nr.times { res << (recv === "something") } # opt_streqq2
nr.times { res << ("constant" != recv) } # 1st pass peephole
nr.times { res << (recv != "constant") } # 2nd pass peephole
nr.times { res << ("a" != "b") } # 1st pass peephole
@@ -2381,6 +2419,25 @@ class TestString2 < TestString
nr.times { res << ("".length == 0) } # 2nd pass peephole
assert_equal before, GC.stat(:total_allocated_objects)
assert_equal [ true ], res.uniq!
+
+ # :+ optimizations
+ res.clear
+ before = GC.stat(:total_allocated_objects)
+ nr.times { res << ("foo" + recv) }
+ assert_equal before + nr, GC.stat(:total_allocated_objects)
+ assert_equal [ "foosomething" ], res.uniq!
+
+ res.clear
+ before = GC.stat(:total_allocated_objects)
+ nr.times { res << (recv + "foo") }
+ assert_equal before + nr, GC.stat(:total_allocated_objects)
+ assert_equal [ "somethingfoo" ], res.uniq!
+
+ res.clear
+ before = GC.stat(:total_allocated_objects)
+ nr.times { res << ('a' * 3) }
+ assert_equal before + nr, GC.stat(:total_allocated_objects)
+ assert_equal [ "aaa" ], res.uniq!
end
end
end
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 06/18] vm: automatically define optimized method enums
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (3 preceding siblings ...)
2014-10-18 2:41 ` [PATCH 05/18] opt_str_lit: optimize allocations for +, %, * and === calls Eric Wong
@ 2014-10-18 2:41 ` Eric Wong
2014-10-18 2:41 ` [PATCH 07/18] fix mismerge Eric Wong
` (11 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:41 UTC (permalink / raw)
To: spew
This allows more flexible management of optimized methods instead
of constraining us to a 2-dimensional bitmap array where some
classes may have many optimized methods and others have few.
---
common.mk | 18 +++++-
compile.c | 81 ++++++++++++---------------
defs/opt_method.def | 49 ++++++++++++++++
insns.def | 129 +++++++++++++++++++------------------------
template/opt_method.h.tmpl | 71 ++++++++++++++++++++++++
template/opt_method.inc.tmpl | 49 ++++++++++++++++
vm.c | 67 +++-------------------
vm_core.h | 44 ++-------------
vm_insnhelper.c | 8 +--
vm_insnhelper.h | 25 ++++++++-
10 files changed, 318 insertions(+), 223 deletions(-)
create mode 100644 defs/opt_method.def
create mode 100644 template/opt_method.h.tmpl
create mode 100644 template/opt_method.inc.tmpl
diff --git a/common.mk b/common.mk
index ce01aca..0a533a5 100644
--- a/common.mk
+++ b/common.mk
@@ -639,7 +639,7 @@ PROBES_H_INCLUDES = {$(VPATH)}probes.h
VM_CORE_H_INCLUDES = {$(VPATH)}vm_core.h {$(VPATH)}thread_$(THREAD_MODEL).h \
{$(VPATH)}node.h {$(VPATH)}method.h {$(VPATH)}ruby_atomic.h \
{$(VPATH)}vm_debug.h {$(VPATH)}id.h {$(VPATH)}thread_native.h \
- $(CCAN_LIST_INCLUDES)
+ $(CCAN_LIST_INCLUDES) {$(VPATH)}opt_method.h
###
@@ -826,7 +826,7 @@ vm.$(OBJEXT): {$(VPATH)}vm.c {$(VPATH)}gc.h {$(VPATH)}iseq.h \
$(VM_CORE_H_INCLUDES) {$(VPATH)}vm_method.c {$(VPATH)}vm_eval.c \
{$(VPATH)}vm_insnhelper.c {$(VPATH)}vm_insnhelper.h {$(VPATH)}vm_exec.c \
{$(VPATH)}vm_exec.h {$(VPATH)}insns.def {$(VPATH)}vmtc.inc \
- {$(VPATH)}vm.inc {$(VPATH)}insns.inc \
+ {$(VPATH)}vm.inc {$(VPATH)}insns.inc {$(VPATH)}opt_method.inc \
{$(VPATH)}internal.h {$(VPATH)}vm.h {$(VPATH)}constant.h \
$(PROBES_H_INCLUDES) {$(VPATH)}probes_helper.h {$(VPATH)}vm_opts.h
vm_dump.$(OBJEXT): {$(VPATH)}vm_dump.c $(RUBY_H_INCLUDES) \
@@ -931,6 +931,20 @@ incs: $(INSNS) {$(VPATH)}node_name.inc {$(VPATH)}encdb.h {$(VPATH)}transdb.h {$(
insns: $(INSNS)
+opt_method.h: $(srcdir)/tool/generic_erb.rb \
+ $(srcdir)/template/opt_method.h.tmpl \
+ $(srcdir)/defs/opt_method.def
+ $(ECHO) generating $@
+ $(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
+ $(srcdir)/template/opt_method.h.tmpl
+
+opt_method.inc: $(srcdir)/tool/generic_erb.rb \
+ $(srcdir)/template/opt_method.inc.tmpl \
+ $(srcdir)/defs/opt_method.def
+ $(ECHO) generating $@
+ $(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
+ $(srcdir)/template/opt_method.inc.tmpl
+
id.h: $(srcdir)/tool/generic_erb.rb $(srcdir)/template/id.h.tmpl $(srcdir)/defs/id.def
$(ECHO) generating $@
$(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb --output=$@ \
diff --git a/compile.c b/compile.c
index a40831c..7d6771b 100644
--- a/compile.c
+++ b/compile.c
@@ -1703,11 +1703,12 @@ get_prev_insn(INSN *iobj)
return 0;
}
+#define new_recvinfo_for_put(iseq,str,mid,klass) \
+ new_recvinfo_for_put_(iseq,str,OM_##mid##__##klass)
static VALUE
-new_recvinfo_for_put(rb_iseq_t *iseq, VALUE str,
- enum ruby_basic_operators bop, int redef_flag)
+new_recvinfo_for_put_(rb_iseq_t *iseq, VALUE str, enum ruby_optimized_method om)
{
- VALUE ri = rb_ary_new_from_args(3, str, INT2FIX(bop), INT2FIX(redef_flag));
+ VALUE ri = rb_ary_new_from_args(2, str, INT2FIX(om));
hide_obj(ri);
iseq_add_mark_object(iseq, ri);
@@ -1715,12 +1716,13 @@ new_recvinfo_for_put(rb_iseq_t *iseq, VALUE str,
return ri;
}
+#define new_recvinfo_for_call(iseq,str,mid,klass) \
+ new_recvinfo_for_call_((iseq),(str),OM_##mid##__##klass,(mid))
static VALUE
-new_recvinfo_for_call(rb_iseq_t *iseq, VALUE str,
- enum ruby_basic_operators bop, int redef_flag, ID mid)
+new_recvinfo_for_call_(rb_iseq_t *iseq, VALUE str,
+ enum ruby_optimized_method om, ID mid)
{
- VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(bop), INT2FIX(redef_flag),
- ID2SYM(mid));
+ VALUE ri = rb_ary_new_from_args(3, str, INT2FIX(om), ID2SYM(mid));
hide_obj(ri);
iseq_add_mark_object(iseq, ri);
@@ -1728,12 +1730,13 @@ new_recvinfo_for_call(rb_iseq_t *iseq, VALUE str,
return ri;
}
+#define new_recvinfo_for_arg(iseq,str,mid,klass,off) \
+ new_recvinfo_for_arg_((iseq),(str),OM_##mid##__##klass,(rb_c##klass),(off))
static VALUE
-new_recvinfo_for_arg(rb_iseq_t *iseq, VALUE str,
- enum ruby_basic_operators bop, int redef_flag,
- VALUE klass, int recv_off)
+new_recvinfo_for_arg_(rb_iseq_t *iseq, VALUE str,
+ enum ruby_optimized_method om, VALUE klass, int recv_off)
{
- VALUE ri = rb_ary_new_from_args(5, str, INT2FIX(bop), INT2FIX(redef_flag),
+ VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(om),
klass, INT2FIX(recv_off));
hide_obj(ri);
@@ -1880,17 +1883,14 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
*/
switch (ci->mid) {
case idFreeze:
- ri = new_recvinfo_for_call(iseq, str, BOP_FREEZE,
- STRING_REDEFINED_OP_FLAG, ci->mid);
+ ri = new_recvinfo_for_call(iseq, str, idFreeze, String);
REMOVE_ELEM((LINK_ELEMENT *)niobj);
break;
case idSize:
- ri = new_recvinfo_for_put(iseq, str, BOP_SIZE,
- STRING_REDEFINED_OP_FLAG);
+ ri = new_recvinfo_for_put(iseq, str, idSize, String);
break;
case idLength:
- ri = new_recvinfo_for_put(iseq, str, BOP_LENGTH,
- STRING_REDEFINED_OP_FLAG);
+ ri = new_recvinfo_for_put(iseq, str, idLength, String);
break;
}
break;
@@ -1898,33 +1898,27 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
switch (ci->mid) {
case idAREF:
/* optimize allocation: obj["lit"] */
- ri = new_recvinfo_for_arg(iseq, str, BOP_AREF,
- HASH_REDEFINED_OP_FLAG, rb_cHash, 0);
+ ri = new_recvinfo_for_arg(iseq, str, idAREF, Hash, 0);
break;
case idEq:
/* optimize allocation: obj == "lit" */
- ri = new_recvinfo_for_arg(iseq, str, BOP_EQ,
- STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ ri = new_recvinfo_for_arg(iseq, str, idEq, String, 0);
break;
case idNeq:
/* optimize allocation: obj != "lit" */
- ri = new_recvinfo_for_arg(iseq, str, BOP_NEQ,
- STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ ri = new_recvinfo_for_arg(iseq, str, idNeq, String, 0);
break;
case idLTLT:
/* optimize allocation: obj << "lit" */
- ri = new_recvinfo_for_arg(iseq, str, BOP_LTLT,
- STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ ri = new_recvinfo_for_arg(iseq, str, idLTLT, String, 0);
break;
case idPLUS:
/* optimize allocation: obj + "lit" */
- ri = new_recvinfo_for_arg(iseq, str, BOP_PLUS,
- STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ ri = new_recvinfo_for_arg(iseq, str, idPLUS, String, 0);
break;
case idEqq:
/* optimize allocation: obj === "lit" */
- ri = new_recvinfo_for_arg(iseq, str, BOP_EQQ,
- STRING_REDEFINED_OP_FLAG, rb_cString, 0);
+ ri = new_recvinfo_for_arg(iseq, str, idEqq, String, 0);
break;
}
}
@@ -3213,18 +3207,18 @@ build_postexe_iseq(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *body)
return Qnil;
}
-static enum ruby_basic_operators
-opt_str_lit_recv_bop(ID mid)
+static enum ruby_optimized_method
+opt_str_lit_recv_om(ID mid)
{
switch (mid) {
- case idEq: return BOP_EQ;
- case idNeq: return BOP_NEQ;
- case idPLUS: return BOP_PLUS;
- case idMULT: return BOP_MULT;
- case idMOD: return BOP_MOD;
- case idEqq: return BOP_EQQ;
+ case idEq: return OM_idEq__String;
+ case idNeq: return OM_idNeq__String;
+ case idPLUS: return OM_idPLUS__String;
+ case idMULT: return OM_idMULT__String;
+ case idMOD: return OM_idMOD__String;
+ case idEqq: return OM_idEqq__String;
}
- return BOP_LAST_;
+ return OM_LAST_;
}
/**
@@ -4452,7 +4446,7 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
#endif
/* receiver */
if (type == NODE_CALL) {
- enum ruby_basic_operators bop;
+ enum ruby_optimized_method om;
/*
* optimize:
* "yoda" == other -> opt_str_lit("yoda").send(:==, other)
@@ -4462,15 +4456,14 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
* "fmt" % args -> opt_str_lit("str").send(:%, other)
*/
if (iseq->compile_data->option->peephole_optimization &&
- ((bop = opt_str_lit_recv_bop(mid)) != BOP_LAST_) &&
+ ((om = opt_str_lit_recv_om(mid)) != OM_LAST_) &&
!private_recv_p(node) &&
node->nd_recv && nd_type(node->nd_recv) == NODE_STR &&
node->nd_args && nd_type(node->nd_args) == NODE_ARRAY &&
node->nd_args->nd_alen == 1)
{
VALUE yoda = rb_fstring(node->nd_recv->nd_lit);
- VALUE recv_info = new_recvinfo_for_put(iseq, yoda,
- bop, STRING_REDEFINED_OP_FLAG);
+ VALUE recv_info = new_recvinfo_for_put_(iseq, yoda, om);
node->nd_recv->nd_lit = yoda;
ADD_INSN1(recv, line, opt_str_lit, recv_info);
@@ -5376,9 +5369,7 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
nd_type(node->nd_args->nd_head) == NODE_STR)
{
VALUE str = rb_fstring(node->nd_args->nd_head->nd_lit);
- VALUE recv_info = new_recvinfo_for_arg(iseq, str,
- BOP_ASET, HASH_REDEFINED_OP_FLAG,
- rb_cHash, 0);
+ VALUE recv_info = new_recvinfo_for_arg(iseq, str, idASET, Hash, 0);
node->nd_args->nd_head->nd_lit = str;
if (!poped) {
diff --git a/defs/opt_method.def b/defs/opt_method.def
new file mode 100644
index 0000000..acc5e6b
--- /dev/null
+++ b/defs/opt_method.def
@@ -0,0 +1,49 @@
+# byte align the bitmap for now, maybe some arches do better with long or int
+# we may also use a larger size (in the unlikely case) we need more than
+# 7 optimized classes per mid. Currently this caps us to 256 optimized
+# (mid, klass) combinations (tested with OM_SHIFT=4, giving us 64K)
+OM_SHIFT = 3
+OM_ALIGN = 1 << OM_SHIFT
+OM_ALIGN_MASK = ~(OM_ALIGN - 1)
+OPT_METHODS = [
+ %w(idPLUS Fixnum Float String Array),
+ %w(idMINUS Fixnum Float),
+ %w(idMULT Fixnum Float String),
+ %w(idDIV Fixnum Float),
+ %w(idMOD Fixnum Float String),
+ %w(idEq Fixnum Float String),
+ %w(idNeq Fixnum Float String),
+ # id, mask classes
+ [ 'idEqq', %w(Bignum Fixnum Float Symbol), *%w(String) ],
+ %w(idLT Fixnum Float),
+ %w(idLE Fixnum Float),
+ %w(idGT Fixnum Float),
+ %w(idGE Fixnum Float),
+ %w(idLTLT String Array),
+ %w(idAREF Array Hash),
+ %w(idASET Array Hash),
+ %w(idLength Array String Hash),
+ %w(idSize Array String Hash),
+ %w(idEmptyP Array String Hash),
+ %w(idSucc Fixnum String Time),
+ %w(idEqTilde Regexp String),
+ %w(idFreeze String),
+]
+
+# for checking optimized classes,
+# speeds up method definitions of non-core classes
+def opt_classes
+ rv = {}
+ OPT_METHODS.each do |(_, *classes)|
+ classes.flatten.each { |c| rv[c] = true }
+ end
+ rv
+end
+
+def om(mid, klass)
+ if Array === klass
+ "OM_#{mid}__#{klass.join('_')}"
+ else
+ "OM_#{mid}__#{klass}"
+ end
+end
diff --git a/insns.def b/insns.def
index f6740f8..e304338 100644
--- a/insns.def
+++ b/insns.def
@@ -367,36 +367,34 @@ opt_str_lit
/*
* recv_info:
* 0 - str
- * 1 - basic operator flag (BOP_*)
- * 2 - redefined flag (*_REDEFINED_OP_FLAG)
+ * 1 - optimized method flag (OM_*)
* optional:
- * 3 - Class (optimized receiver class) or Symbol (method name)
- * 4 - stack offset (Fixint), only present if [3] is a Class,
+ * 2 - Class (optimized receiver class) or Symbol (method name)
+ * 3 - stack offset (Fixint), only present if [3] is a Class,
* -1 stack offset means receiver is the frozen string literal itself
*/
const VALUE *ri = RARRAY_CONST_PTR(recv_info);
long len = RARRAY_LEN(recv_info);
- enum ruby_basic_operators bop = FIX2INT(ri[1]);
- int redef_flag = FIX2INT(ri[2]);
+ enum ruby_optimized_method om = FIX2INT(ri[1]);
val = ri[0]; /* hopefully, this is the only val assignment we need */
- if (len > 3) {
- VALUE msym_or_class = ri[3];
+ if (len > 2) {
+ VALUE msym_or_class = ri[2];
/* check if the receiver is an on-stack object: */
if (!SYMBOL_P(msym_or_class)) {
- int n = FIX2INT(ri[4]);
+ int n = FIX2INT(ri[3]);
VALUE recv = n < 0 ? val : TOPN(n);
if (SPECIAL_CONST_P(recv) ||
RBASIC_CLASS(recv) != msym_or_class ||
- !BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+ !rb_basic_op_unredefined_p(om)) {
/* bad, somebody redefined an optimized method, slow path: */
val = rb_str_resurrect(val);
}
}
else { /* receiver is the string literal itself (e.g. "str".freeze) */
- if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+ if (!rb_basic_op_unredefined_p(om)) {
/* bad, somebody redefined an optimized method, slow path: */
val = rb_str_resurrect(val);
val = rb_funcall(val, SYM2ID(msym_or_class), 0);
@@ -404,7 +402,7 @@ opt_str_lit
}
}
else { /* string lit is receiver, but there are args */
- if (!BASIC_OP_UNREDEFINED_P(bop, redef_flag)) {
+ if (!rb_basic_op_unredefined_p(om)) {
/* bad, somebody redefined an optimized method, slow path: */
val = rb_str_resurrect(val);
}
@@ -1328,11 +1326,7 @@ opt_case_dispatch
case T_FIXNUM:
case T_BIGNUM:
case T_STRING:
- if (BASIC_OP_UNREDEFINED_P(BOP_EQQ,
- SYMBOL_REDEFINED_OP_FLAG |
- FIXNUM_REDEFINED_OP_FLAG |
- BIGNUM_REDEFINED_OP_FLAG |
- STRING_REDEFINED_OP_FLAG)) {
+ if (rb_basic_mask_unredefined_p(OM_idEqq__Bignum_Fixnum_Float_Symbol)) {
st_data_t val;
if (st_lookup(RHASH_TBL_RAW(hash), key, &val)) {
JUMP(FIX2INT((VALUE)val));
@@ -1360,8 +1354,7 @@ opt_plus
(VALUE recv, VALUE obj)
(VALUE val)
{
- if (FIXNUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_PLUS,FIXNUM_REDEFINED_OP_FLAG)) {
+ if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idPLUS, Fixnum)) {
/* fixnum + fixnum */
#ifndef LONG_LONG_VALUE
val = (recv + (obj & (~1)));
@@ -1384,20 +1377,20 @@ opt_plus
#endif
}
else if (FLONUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idPLUS, Float)) {
val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
}
else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
- BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idPLUS, Float)) {
val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
}
else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString &&
- BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idPLUS, String)) {
val = rb_str_plus(recv, obj);
}
else if (RBASIC_CLASS(recv) == rb_cArray &&
- BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idPLUS, Array)) {
val = rb_ary_plus(recv, obj);
}
else {
@@ -1424,7 +1417,7 @@ opt_minus
(VALUE val)
{
if (FIXNUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_MINUS, FIXNUM_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idMINUS, Fixnum)) {
long a, b, c;
a = FIX2LONG(recv);
@@ -1439,12 +1432,12 @@ opt_minus
}
}
else if (FLONUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idMINUS, Float)) {
val = DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj));
}
else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
- BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idMINUS, Float)) {
val = DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj));
}
else {
@@ -1472,7 +1465,7 @@ opt_mult
(VALUE val)
{
if (FIXNUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_MULT, FIXNUM_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idMULT, Fixnum)) {
long a, b;
a = FIX2LONG(recv);
@@ -1489,13 +1482,12 @@ opt_mult
}
}
}
- else if (FLONUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) {
+ else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMULT, Float)) {
val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj));
}
else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
- BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idMULT, Float)) {
val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj));
}
else {
@@ -1521,8 +1513,7 @@ opt_div
(VALUE recv, VALUE obj)
(VALUE val)
{
- if (FIXNUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_DIV, FIXNUM_REDEFINED_OP_FLAG)) {
+ if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idDIV, Fixnum)) {
long x, y, div;
x = FIX2LONG(recv);
@@ -1552,13 +1543,12 @@ opt_div
}
val = LONG2NUM(div);
}
- else if (FLONUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) {
+ else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idDIV, Float)) {
val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj));
}
else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
- BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idDIV, Float)) {
val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj));
}
else {
@@ -1584,8 +1574,7 @@ opt_mod
(VALUE recv, VALUE obj)
(VALUE val)
{
- if (FIXNUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_MOD, FIXNUM_REDEFINED_OP_FLAG )) {
+ if (FIXNUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMOD, Fixnum )) {
long x, y;
x = FIX2LONG(recv);
@@ -1619,13 +1608,12 @@ opt_mod
val = LONG2FIX(mod);
}
}
- else if (FLONUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) {
+ else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idMOD, Float)) {
val = DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)));
}
else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
- BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idMOD, Float)) {
val = DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)));
}
else {
@@ -1704,7 +1692,7 @@ opt_lt
(VALUE val)
{
if (FIXNUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_LT, FIXNUM_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idLT, Fixnum)) {
SIGNED_VALUE a = recv, b = obj;
if (a < b) {
@@ -1715,13 +1703,13 @@ opt_lt
}
}
else if (FLONUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idLT, Float)) {
/* flonum is not NaN */
val = RFLOAT_VALUE(recv) < RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
}
else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
- BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idLT, Float)) {
val = double_cmp_lt(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
}
else {
@@ -1748,7 +1736,7 @@ opt_le
(VALUE val)
{
if (FIXNUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_LE, FIXNUM_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idLE, Fixnum)) {
SIGNED_VALUE a = recv, b = obj;
if (a <= b) {
@@ -1759,7 +1747,7 @@ opt_le
}
}
else if (FLONUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_LE, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idLE, Float)) {
/* flonum is not NaN */
val = RFLOAT_VALUE(recv) <= RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
}
@@ -1783,7 +1771,7 @@ opt_gt
(VALUE val)
{
if (FIXNUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_GT, FIXNUM_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idGT, Fixnum)) {
SIGNED_VALUE a = recv, b = obj;
if (a > b) {
@@ -1794,13 +1782,13 @@ opt_gt
}
}
else if (FLONUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idGT, Float)) {
/* flonum is not NaN */
val = RFLOAT_VALUE(recv) > RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
}
else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
- BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idGT, Float)) {
val = double_cmp_gt(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
}
else {
@@ -1827,7 +1815,7 @@ opt_ge
(VALUE val)
{
if (FIXNUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_GE, FIXNUM_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idGE, Fixnum)) {
SIGNED_VALUE a = recv, b = obj;
if (a >= b) {
@@ -1837,8 +1825,7 @@ opt_ge
val = Qfalse;
}
}
- else if (FLONUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_GE, FLOAT_REDEFINED_OP_FLAG)) {
+ else if (FLONUM_2_P(recv, obj) && BASIC_OP_UNREDEFINED_P(idGE, Float)) {
/* flonum is not NaN */
val = RFLOAT_VALUE(recv) >= RFLOAT_VALUE(obj) ? Qtrue : Qfalse;
}
@@ -1862,11 +1849,11 @@ opt_ltlt
{
if (!SPECIAL_CONST_P(recv)) {
if (RBASIC_CLASS(recv) == rb_cString &&
- BASIC_OP_UNREDEFINED_P(BOP_LTLT, STRING_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idLTLT, String)) {
val = rb_str_concat(recv, obj);
}
else if (RBASIC_CLASS(recv) == rb_cArray &&
- BASIC_OP_UNREDEFINED_P(BOP_LTLT, ARRAY_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idLTLT, Array)) {
val = rb_ary_push(recv, obj);
}
else {
@@ -1893,10 +1880,10 @@ opt_aref
(VALUE val)
{
if (!SPECIAL_CONST_P(recv)) {
- if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(BOP_AREF, ARRAY_REDEFINED_OP_FLAG) && FIXNUM_P(obj)) {
+ if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(idAREF, Array) && FIXNUM_P(obj)) {
val = rb_ary_entry(recv, FIX2LONG(obj));
}
- else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) {
+ else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(idAREF, Hash)) {
val = rb_hash_aref(recv, obj);
}
else {
@@ -1923,11 +1910,11 @@ opt_aset
(VALUE val)
{
if (!SPECIAL_CONST_P(recv)) {
- if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(BOP_ASET, ARRAY_REDEFINED_OP_FLAG) && FIXNUM_P(obj)) {
+ if (RBASIC_CLASS(recv) == rb_cArray && BASIC_OP_UNREDEFINED_P(idASET, Array) && FIXNUM_P(obj)) {
rb_ary_store(recv, FIX2LONG(obj), set);
val = set;
}
- else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) {
+ else if (RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(idASET, Hash)) {
rb_hash_aset(recv, obj, set);
val = set;
}
@@ -1957,15 +1944,15 @@ opt_length
{
if (!SPECIAL_CONST_P(recv)) {
if (RBASIC_CLASS(recv) == rb_cString &&
- BASIC_OP_UNREDEFINED_P(BOP_LENGTH, STRING_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idLength, String)) {
val = rb_str_length(recv);
}
else if (RBASIC_CLASS(recv) == rb_cArray &&
- BASIC_OP_UNREDEFINED_P(BOP_LENGTH, ARRAY_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idLength, Array)) {
val = LONG2NUM(RARRAY_LEN(recv));
}
else if (RBASIC_CLASS(recv) == rb_cHash &&
- BASIC_OP_UNREDEFINED_P(BOP_LENGTH, HASH_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idLength, Hash)) {
val = INT2FIX(RHASH_SIZE(recv));
}
else {
@@ -1992,15 +1979,15 @@ opt_size
{
if (!SPECIAL_CONST_P(recv)) {
if (RBASIC_CLASS(recv) == rb_cString &&
- BASIC_OP_UNREDEFINED_P(BOP_SIZE, STRING_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idSize, String)) {
val = rb_str_length(recv);
}
else if (RBASIC_CLASS(recv) == rb_cArray &&
- BASIC_OP_UNREDEFINED_P(BOP_SIZE, ARRAY_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idSize, Array)) {
val = LONG2NUM(RARRAY_LEN(recv));
}
else if (RBASIC_CLASS(recv) == rb_cHash &&
- BASIC_OP_UNREDEFINED_P(BOP_SIZE, HASH_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idSize, Hash)) {
val = INT2FIX(RHASH_SIZE(recv));
}
else {
@@ -2027,17 +2014,17 @@ opt_empty_p
{
if (!SPECIAL_CONST_P(recv)) {
if (RBASIC_CLASS(recv) == rb_cString &&
- BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, STRING_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idEmptyP, String)) {
if (RSTRING_LEN(recv) == 0) val = Qtrue;
else val = Qfalse;
}
else if (RBASIC_CLASS(recv) == rb_cArray &&
- BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, ARRAY_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idEmptyP, Array)) {
if (RARRAY_LEN(recv) == 0) val = Qtrue;
else val = Qfalse;
}
else if (RBASIC_CLASS(recv) == rb_cHash &&
- BASIC_OP_UNREDEFINED_P(BOP_EMPTY_P, HASH_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idEmptyP, Hash)) {
if (RHASH_EMPTY_P(recv)) val = Qtrue;
else val = Qfalse;
}
@@ -2065,7 +2052,7 @@ opt_succ
{
if (SPECIAL_CONST_P(recv)) {
if (FIXNUM_P(recv) &&
- BASIC_OP_UNREDEFINED_P(BOP_SUCC, FIXNUM_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idSucc, Fixnum)) {
const VALUE obj = INT2FIX(1);
/* fixnum + INT2FIX(1) */
val = (recv + (obj & (~1)));
@@ -2080,11 +2067,11 @@ opt_succ
}
else {
if (RBASIC_CLASS(recv) == rb_cString &&
- BASIC_OP_UNREDEFINED_P(BOP_SUCC, STRING_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idSucc, String)) {
val = rb_str_succ(recv);
}
else if (RBASIC_CLASS(recv) == rb_cTime &&
- BASIC_OP_UNREDEFINED_P(BOP_SUCC, TIME_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idSucc, Time)) {
val = rb_time_succ(recv);
}
else
@@ -2134,7 +2121,7 @@ opt_regexpmatch1
(VALUE obj)
(VALUE val)
{
- if (BASIC_OP_UNREDEFINED_P(BOP_MATCH, REGEXP_REDEFINED_OP_FLAG)) {
+ if (BASIC_OP_UNREDEFINED_P(idEqTilde, Regexp)) {
val = rb_reg_match(r, obj);
}
else {
@@ -2154,7 +2141,7 @@ opt_regexpmatch2
(VALUE val)
{
if (CLASS_OF(obj2) == rb_cString &&
- BASIC_OP_UNREDEFINED_P(BOP_MATCH, STRING_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idEqTilde, String)) {
val = rb_reg_match(obj1, obj2);
}
else {
diff --git a/template/opt_method.h.tmpl b/template/opt_method.h.tmpl
new file mode 100644
index 0000000..39c4043
--- /dev/null
+++ b/template/opt_method.h.tmpl
@@ -0,0 +1,71 @@
+/* DO NOT EDIT THIS FILE DIRECTLY: edit template/opt_method.h.tmpl instead */
+#ifndef RUBY_OPT_METHOD_H
+#define RUBY_OPT_METHOD_H
+<%
+defs = File.join(File.dirname(File.dirname(erb.filename)), "defs/opt_method.def")
+eval(File.read(defs), binding, defs)
+%>
+typedef uint<%= OM_ALIGN %>_t rb_om_bitmap_t;
+
+enum ruby_optimized_method {
+<%
+opt_masks = {}
+n = 0
+OPT_METHODS.each do |(mid, *classes)|
+ classes.each do |klass|
+ if Array === klass
+ opt_masks[mid] = klass.dup
+ # we will align these in the second loop, below
+ next
+ end %>
+ <%= om(mid, klass) %> = <%= n += 1 %>,
+<%
+ end # classes.each
+end # OPT_METHODS.each
+
+# align multi-class bits so a single AND operation may
+# be byte-aligned and used to check an mid for up to 7 classes at once:
+opt_masks.each do |mid, classes|
+ # round up n to the next aligned byte slot
+ n = (n + OM_ALIGN) & OM_ALIGN_MASK
+
+ classes.each do |k|
+%>
+ <%= om(mid, k) %> = <%= n += 1 %>,
+<%=
+# we need this macro to generate shifts for the masks enums below:
+"#define #{om(mid, k)} (#{n})"
+%>
+<%
+ end # classes.each
+end # opt_masks.each
+if n >= ((1 << OM_ALIGN) - 1)
+ raise "OM_ALIGN needs to be raised to support more optimized methods"
+end
+%>
+ OM_LAST_ = <%= om_last = (n += 1) %>, /* for bitmap sizing */
+ /* special mask values below */
+<%
+# generate mask enums
+opt_masks.each do |mid, c|
+ # n.b.: negate masks to simplify the rb_opt_method_is_mask check:
+%>
+ <%= om(mid, c) %> = -(<%=
+ # pack into 16 bits so it may be a negative Fixnum
+ # 1) 8 byte offset
+ # 2) OM_ALIGN bytes mask (8 or 16)
+ sep = "|\n "
+ "/* offset: */ ((#{om(mid, c[0])} / #{OM_ALIGN}) << #{OM_ALIGN}) " \
+ "#{sep} /* mask: */ (" +
+ c.map { |k| "(1U << (#{om(mid, k)} % #{OM_ALIGN}))" }.join(sep) + # mask
+ ')'
+ %>),
+<%
+end # opt_masks.each
+%>
+ OM_ALIGN_ = <%= OM_ALIGN %>,
+ OM_SIZE_ = <%= ((om_last + OM_ALIGN) & OM_ALIGN_MASK) / OM_ALIGN %>,
+ OM_GETMASK_ = (1 << OM_ALIGN_) - 1
+};
+
+#endif /* RUBY_OPT_METHOD_H */
diff --git a/template/opt_method.inc.tmpl b/template/opt_method.inc.tmpl
new file mode 100644
index 0000000..0501121
--- /dev/null
+++ b/template/opt_method.inc.tmpl
@@ -0,0 +1,49 @@
+/* DO NOT EDIT THIS FILE DIRECTLY: edit template/opt_method.inc.tmpl instead */
+<%
+defs = File.join(File.dirname(File.dirname(erb.filename)), "defs/opt_method.def")
+eval(File.read(defs), binding, defs)
+%>
+
+static void
+add_opt_method(st_table *tbl, VALUE klass, ID mid,
+ enum ruby_optimized_method om)
+{
+ rb_method_entry_t *me = rb_method_entry_at(klass, mid);
+
+ if (me && me->def && me->def->type == VM_METHOD_TYPE_CFUNC) {
+ st_insert(tbl, (st_data_t)me, (st_data_t)om);
+ }
+ else if (mid != idNeq) {
+ rb_bug("undefined optimized method: %s", rb_id2name(mid));
+ }
+}
+
+static void
+vm_init_redefined_flags(void *tbl)
+{
+<%
+OPT_METHODS.each do |(mid, *classes)|
+ classes.each do |klass|
+ if Array === klass
+ klass.each do |k|
+%>
+ add_opt_method(tbl, rb_c<%= k %>, <%= mid %>, <%= om(mid, k) %>);
+<%
+ end # klass.each
+ else
+%>
+ add_opt_method(tbl, rb_c<%= klass %>, <%= mid %>, <%= om(mid, klass) %>);
+<% end # !(Array === klass)
+ end # classes.each
+end # OPT_METHODS.each
+%>
+}
+
+static int
+vm_redefinition_check_flag(VALUE klass)
+{
+<% opt_classes.each_key do |klass| %>
+ if (klass == rb_c<%= klass %>) return 1;
+<% end %>
+ return 0;
+}
diff --git a/vm.c b/vm.c
index cc88926..35c4120 100644
--- a/vm.c
+++ b/vm.c
@@ -20,6 +20,7 @@
#include "eval_intern.h"
#include "probes.h"
#include "probes_helper.h"
+#include "opt_method.inc"
static inline VALUE *
VM_EP_LEP(VALUE *ep)
@@ -1134,30 +1135,16 @@ rb_iter_break_value(VALUE val)
static st_table *vm_opt_method_table = 0;
-static int
-vm_redefinition_check_flag(VALUE klass)
-{
- if (klass == rb_cFixnum) return FIXNUM_REDEFINED_OP_FLAG;
- if (klass == rb_cFloat) return FLOAT_REDEFINED_OP_FLAG;
- if (klass == rb_cString) return STRING_REDEFINED_OP_FLAG;
- if (klass == rb_cArray) return ARRAY_REDEFINED_OP_FLAG;
- if (klass == rb_cHash) return HASH_REDEFINED_OP_FLAG;
- if (klass == rb_cBignum) return BIGNUM_REDEFINED_OP_FLAG;
- if (klass == rb_cSymbol) return SYMBOL_REDEFINED_OP_FLAG;
- if (klass == rb_cTime) return TIME_REDEFINED_OP_FLAG;
- if (klass == rb_cRegexp) return REGEXP_REDEFINED_OP_FLAG;
- return 0;
-}
-
static void
rb_vm_check_redefinition_opt_method(const rb_method_entry_t *me, VALUE klass)
{
- st_data_t bop;
+ st_data_t om;
if (!me->def || me->def->type == VM_METHOD_TYPE_CFUNC) {
- if (st_lookup(vm_opt_method_table, (st_data_t)me, &bop)) {
- int flag = vm_redefinition_check_flag(klass);
+ if (st_lookup(vm_opt_method_table, (st_data_t)me, &om)) {
+ unsigned int i = om / OM_ALIGN_;
+ rb_om_bitmap_t mask = (rb_om_bitmap_t)(1U << (om % OM_ALIGN_));
- ruby_vm_redefined_flag[bop] |= flag;
+ ruby_vm_redefined_flag[i] |= mask;
}
}
}
@@ -1184,51 +1171,11 @@ rb_vm_check_redefinition_by_prepend(VALUE klass)
}
static void
-add_opt_method(VALUE klass, ID mid, VALUE bop)
-{
- rb_method_entry_t *me = rb_method_entry_at(klass, mid);
-
- if (me && me->def &&
- me->def->type == VM_METHOD_TYPE_CFUNC) {
- st_insert(vm_opt_method_table, (st_data_t)me, (st_data_t)bop);
- }
- else {
- rb_bug("undefined optimized method: %s", rb_id2name(mid));
- }
-}
-
-static void
vm_init_redefined_flag(void)
{
- ID mid;
- VALUE bop;
-
vm_opt_method_table = st_init_numtable();
-#define OP(mid_, bop_) (mid = id##mid_, bop = BOP_##bop_, ruby_vm_redefined_flag[bop] = 0)
-#define C(k) add_opt_method(rb_c##k, mid, bop)
- OP(PLUS, PLUS), (C(Fixnum), C(Float), C(String), C(Array));
- OP(MINUS, MINUS), (C(Fixnum), C(Float));
- OP(MULT, MULT), (C(Fixnum), C(Float));
- OP(DIV, DIV), (C(Fixnum), C(Float));
- OP(MOD, MOD), (C(Fixnum), C(Float));
- OP(Eq, EQ), (C(Fixnum), C(Float), C(String));
- OP(Eqq, EQQ), (C(Fixnum), C(Bignum), C(Float), C(Symbol), C(String));
- OP(LT, LT), (C(Fixnum), C(Float));
- OP(LE, LE), (C(Fixnum), C(Float));
- OP(GT, GT), (C(Fixnum), C(Float));
- OP(GE, GE), (C(Fixnum), C(Float));
- OP(LTLT, LTLT), (C(String), C(Array));
- OP(AREF, AREF), (C(Array), C(Hash));
- OP(ASET, ASET), (C(Array), C(Hash));
- OP(Length, LENGTH), (C(Array), C(String), C(Hash));
- OP(Size, SIZE), (C(Array), C(String), C(Hash));
- OP(EmptyP, EMPTY_P), (C(Array), C(String), C(Hash));
- OP(Succ, SUCC), (C(Fixnum), C(String), C(Time));
- OP(EqTilde, MATCH), (C(Regexp), C(String));
- OP(Freeze, FREEZE), (C(String));
-#undef C
-#undef OP
+ vm_init_redefined_flags(vm_opt_method_table); /* opt_method.h.tmpl */
}
/* for vm development */
diff --git a/vm_core.h b/vm_core.h
index 9f0f053..c0f7454 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -24,6 +24,7 @@
#include "method.h"
#include "ruby_atomic.h"
#include "ccan/list/list.h"
+#include "opt_method.h"
#include "ruby/thread_native.h"
#if defined(_WIN32)
@@ -320,33 +321,6 @@ enum ruby_special_exceptions {
ruby_special_error_count
};
-enum ruby_basic_operators {
- BOP_PLUS,
- BOP_MINUS,
- BOP_MULT,
- BOP_DIV,
- BOP_MOD,
- BOP_EQ,
- BOP_EQQ,
- BOP_LT,
- BOP_LE,
- BOP_LTLT,
- BOP_AREF,
- BOP_ASET,
- BOP_LENGTH,
- BOP_SIZE,
- BOP_EMPTY_P,
- BOP_SUCC,
- BOP_GT,
- BOP_GE,
- BOP_NOT,
- BOP_NEQ,
- BOP_MATCH,
- BOP_FREEZE,
-
- BOP_LAST_
-};
-
#define GetVMPtr(obj, ptr) \
GetCoreDataFromValue((obj), rb_vm_t, (ptr))
@@ -441,7 +415,7 @@ typedef struct rb_vm_struct {
size_t fiber_machine_stack_size;
} default_params;
- short redefined_flag[BOP_LAST_];
+ rb_om_bitmap_t redefined_flag[OM_SIZE_];
} rb_vm_t;
/* default values */
@@ -458,18 +432,8 @@ typedef struct rb_vm_struct {
#define RUBY_VM_FIBER_MACHINE_STACK_SIZE ( 64 * 1024 * sizeof(VALUE)) /* 256 KB or 512 KB */
#define RUBY_VM_FIBER_MACHINE_STACK_SIZE_MIN ( 16 * 1024 * sizeof(VALUE)) /* 64 KB or 128 KB */
-/* optimize insn */
-#define FIXNUM_REDEFINED_OP_FLAG (1 << 0)
-#define FLOAT_REDEFINED_OP_FLAG (1 << 1)
-#define STRING_REDEFINED_OP_FLAG (1 << 2)
-#define ARRAY_REDEFINED_OP_FLAG (1 << 3)
-#define HASH_REDEFINED_OP_FLAG (1 << 4)
-#define BIGNUM_REDEFINED_OP_FLAG (1 << 5)
-#define SYMBOL_REDEFINED_OP_FLAG (1 << 6)
-#define TIME_REDEFINED_OP_FLAG (1 << 7)
-#define REGEXP_REDEFINED_OP_FLAG (1 << 8)
-
-#define BASIC_OP_UNREDEFINED_P(op, klass) (LIKELY((GET_VM()->redefined_flag[(op)]&(klass)) == 0))
+#define BASIC_OP_UNREDEFINED_P(mid, klass) \
+ rb_basic_op_unredefined_p(OM_##mid##__##klass)
#ifndef VM_DEBUG_BP_CHECK
#define VM_DEBUG_BP_CHECK 0
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 05ed3c6..2aedb46 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -872,17 +872,17 @@ VALUE
opt_eq_func(VALUE recv, VALUE obj, CALL_INFO ci)
{
if (FIXNUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_EQ, FIXNUM_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idEq, Fixnum)) {
return (recv == obj) ? Qtrue : Qfalse;
}
else if (FLONUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_EQ, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idEq, Float)) {
return (recv == obj) ? Qtrue : Qfalse;
}
else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
if (RBASIC_CLASS(recv) == rb_cFloat &&
RBASIC_CLASS(obj) == rb_cFloat &&
- BASIC_OP_UNREDEFINED_P(BOP_EQ, FLOAT_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idEq, Float)) {
double a = RFLOAT_VALUE(recv);
double b = RFLOAT_VALUE(obj);
@@ -893,7 +893,7 @@ opt_eq_func(VALUE recv, VALUE obj, CALL_INFO ci)
}
else if (RBASIC_CLASS(recv) == rb_cString &&
RBASIC_CLASS(obj) == rb_cString &&
- BASIC_OP_UNREDEFINED_P(BOP_EQ, STRING_REDEFINED_OP_FLAG)) {
+ BASIC_OP_UNREDEFINED_P(idEq, String)) {
return rb_str_equal(recv, obj);
}
}
diff --git a/vm_insnhelper.h b/vm_insnhelper.h
index 31f8ffc..a4290ee 100644
--- a/vm_insnhelper.h
+++ b/vm_insnhelper.h
@@ -229,5 +229,28 @@ enum vm_regan_acttype {
static VALUE make_no_method_exception(VALUE exc, const char *format,
VALUE obj, int argc, const VALUE *argv);
-
+static inline int
+rb_basic_op_unredefined_p(enum ruby_optimized_method om)
+{
+ unsigned int i = om / OM_ALIGN_;
+ rb_om_bitmap_t mask = (rb_om_bitmap_t)(1U << (om % OM_ALIGN_));
+
+ return LIKELY((GET_VM()->redefined_flag[i] & mask) == 0);
+}
+
+static inline int
+rb_basic_mask_unredefined_p(enum ruby_optimized_method om)
+{
+ unsigned int uom = (unsigned int)-om;
+ unsigned int offset = 0xffU & (uom >> OM_ALIGN_);
+ rb_om_bitmap_t mask = (rb_om_bitmap_t)(OM_GETMASK_ & uom);
+
+ return LIKELY((GET_VM()->redefined_flag[offset] & mask) == 0);
+}
+
+static inline int
+rb_opt_method_is_mask(enum ruby_optimized_method om)
+{
+ return !!((int)om < 0);
+}
#endif /* RUBY_INSNHELPER_H */
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 07/18] fix mismerge
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (4 preceding siblings ...)
2014-10-18 2:41 ` [PATCH 06/18] vm: automatically define optimized method enums Eric Wong
@ 2014-10-18 2:41 ` Eric Wong
2014-10-18 2:42 ` [PATCH 08/18] optimize string allocations for sub/gsub/tr/tr_s(!) Eric Wong
` (10 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:41 UTC (permalink / raw)
To: spew
---
test/ruby/test_string.rb | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index a2abe40..f27734b 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2308,13 +2308,6 @@ class TestString < Test::Unit::TestCase
assert_equal before + nr, GC.stat(:total_allocated_objects)
end
end
-end
-
-class TestString2 < TestString
- def initialize(*args)
- super
- @cls = S2
- end
def test_opt_str_lit
assert_separately([], <<-RUBY)
@@ -2398,7 +2391,7 @@ class TestString2 < TestString
before = GC.stat(:total_allocated_objects)
nr.times { res << (recv == "constant") } # opt_streq1
nr.times { res << ("constant" == recv) } # opt_streq2
- nr.times { res << ("something " != recv) } # 1st pass peephole
+ nr.times { res << ("something" != recv) } # 1st pass peephole
nr.times { res << ("constant" == recv) } # opt_streq2
nr.times { res << ("constant" === recv) } # opt_streqq2
nr.times { res << (recv != "something") } # 2nd pass peephole
@@ -2441,3 +2434,10 @@ class TestString2 < TestString
end
end
end
+
+class TestString2 < TestString
+ def initialize(*args)
+ super
+ @cls = S2
+ end
+end
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 08/18] optimize string allocations for sub/gsub/tr/tr_s(!)
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (5 preceding siblings ...)
2014-10-18 2:41 ` [PATCH 07/18] fix mismerge Eric Wong
@ 2014-10-18 2:42 ` Eric Wong
2014-10-18 2:42 ` [PATCH 09/18] compile.c (opt_str_lit_1): hoist out of iseq_peephole_optimize Eric Wong
` (9 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:42 UTC (permalink / raw)
To: spew
Improvements across the board (I haven't tested each method, yet,
but the code changes are DRY enough for me to trust it).
-----------------------------------------------------------
raw data:
[["loop_whileloop2",
[[0.09299727622419596, 0.09182057436555624, 0.09192507807165384],
[0.09175324533134699, 0.0917150853201747, 0.09169120714068413]]],
["vm2_gsub_bang_lit",
[[1.28092535212636, 1.212415685877204, 1.2110721664503217],
[0.6577691622078419, 0.6574153359979391, 0.657793253660202]]],
["vm2_gsub_bang_re",
[[1.5643755476921797, 1.5549067426472902, 1.5551356291398406],
[1.2313632098957896, 1.2304210113361478, 1.2378935469314456]]],
["vm2_gsub_re",
[[1.7676676837727427, 1.8015323374420404, 1.7688637850806117],
[1.54728907905519, 1.530972053296864, 1.5956763122230768]]]]
Elapsed time: 24.618900503 (sec)
-----------------------------------------------------------
benchmark results:
minimum results in each 3 measurements.
Execution time (sec)
name trunk built
loop_whileloop2 0.092 0.092
vm2_gsub_bang_lit* 1.119 0.566
vm2_gsub_bang_re* 1.463 1.139
vm2_gsub_re* 1.676 1.439
Speedup ratio: compare with the result of `trunk' (greater is better)
name built
loop_whileloop2 1.001
vm2_gsub_bang_lit* 1.978
vm2_gsub_bang_re* 1.285
vm2_gsub_re* 1.164
-----------------------------------------------------------
raw data:
[["loop_whileloop2",
[[0.10830817837268114, 0.09708951227366924, 0.0952271893620491],
[0.09657043684273958, 0.09450766257941723, 0.0936131589114666]]],
["vm2_tr_bang",
[[2.8116708220914006, 2.6677203606814146, 2.7375484481453896],
[1.6796088377013803, 1.6209765998646617, 1.7005833145231009]]]]
Elapsed time: 13.805532977 (sec)
-----------------------------------------------------------
benchmark results:
minimum results in each 3 measurements.
Execution time (sec)
name trunk built
loop_whileloop2 0.095 0.094
vm2_tr_bang* 2.572 1.527
Speedup ratio: compare with the result of `trunk' (greater is better)
name built
loop_whileloop2 1.017
vm2_tr_bang* 1.684
---
benchmark/bm_vm2_gsub_bang_lit.rb | 6 +++++
benchmark/bm_vm2_gsub_bang_re.rb | 6 +++++
benchmark/bm_vm2_gsub_re.rb | 6 +++++
benchmark/bm_vm2_tr_bang.rb | 7 ++++++
compile.c | 52 +++++++++++++++++++++++++++++++++++++++
defs/id.def | 9 +++++++
defs/opt_method.def | 8 ++++++
test/ruby/test_string.rb | 34 +++++++++++++++++++++++++
8 files changed, 128 insertions(+)
create mode 100644 benchmark/bm_vm2_gsub_bang_lit.rb
create mode 100644 benchmark/bm_vm2_gsub_bang_re.rb
create mode 100644 benchmark/bm_vm2_gsub_re.rb
create mode 100644 benchmark/bm_vm2_tr_bang.rb
diff --git a/benchmark/bm_vm2_gsub_bang_lit.rb b/benchmark/bm_vm2_gsub_bang_lit.rb
new file mode 100644
index 0000000..9251fb1
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_bang_lit.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ str.gsub!("nomatch", "")
+end
diff --git a/benchmark/bm_vm2_gsub_bang_re.rb b/benchmark/bm_vm2_gsub_bang_re.rb
new file mode 100644
index 0000000..e5fc9ea
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_bang_re.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ str.gsub!(/a/, "")
+end
diff --git a/benchmark/bm_vm2_gsub_re.rb b/benchmark/bm_vm2_gsub_re.rb
new file mode 100644
index 0000000..606f247
--- /dev/null
+++ b/benchmark/bm_vm2_gsub_re.rb
@@ -0,0 +1,6 @@
+i = 0
+str = ""
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ str.gsub(/a/, "")
+end
diff --git a/benchmark/bm_vm2_tr_bang.rb b/benchmark/bm_vm2_tr_bang.rb
new file mode 100644
index 0000000..8065a65
--- /dev/null
+++ b/benchmark/bm_vm2_tr_bang.rb
@@ -0,0 +1,7 @@
+i = 0
+str = "a"
+while i<6_000_000 # benchmark loop 2
+ i += 1
+ str.tr!("a", "A")
+ str.tr!("A", "a")
+end
diff --git a/compile.c b/compile.c
index 7d6771b..205ff6a 100644
--- a/compile.c
+++ b/compile.c
@@ -1745,6 +1745,54 @@ new_recvinfo_for_arg_(rb_iseq_t *iseq, VALUE str,
return ri;
}
+/*
+ * optimize common calls which take two string literals:
+ * foo.sub(/../, "to")
+ * foo.sub!(/../, "to")
+ * foo.gsub(/../, "to")
+ * foo.gsub!(/../, "to")
+ * foo.tr(/../, "to")
+ * foo.tr!(/../, "to")
+ * foo.tr_s(/../, "to")
+ * foo.tr_s!(/../, "to")
+ */
+static VALUE
+opt_str_lit_2(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
+{
+ INSN *piobj;
+ enum ruby_optimized_method om = OM_LAST_;
+
+ switch (ci->mid) {
+#define C(mid) case mid: om = OM_##mid##__String; break
+ C(idSub);
+ C(idSub_bang);
+ C(idGsub);
+ C(idGsub_bang);
+ C(idTr);
+ C(idTr_bang);
+ C(idTr_s);
+ C(idTr_s_bang);
+#undef C
+ default: return Qfalse;
+ }
+
+ /*
+ * previous arg may be a string literal, too:
+ * foo.gsub!("from", "to")
+ * foo.tr!("from", "to")
+ * ..
+ */
+ piobj = (INSN *)get_prev_insn(list);
+ if (piobj && piobj->insn_id == BIN(putstring)) {
+ VALUE pstr = piobj->operands[0];
+ VALUE pri = new_recvinfo_for_arg_(iseq, pstr, om, rb_cString, 0);
+ piobj->operands[0] = pri;
+ piobj->insn_id = BIN(opt_str_lit);
+ }
+
+ return new_recvinfo_for_arg_(iseq, str, om, rb_cString, 1);
+}
+
static int
iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcallopt)
{
@@ -1921,6 +1969,10 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
ri = new_recvinfo_for_arg(iseq, str, idEqq, String, 0);
break;
}
+ break;
+ case 2:
+ ri = opt_str_lit_2(iseq, str, ci, (INSN *)list);
+ break;
}
if (ri != Qfalse) {
iobj->insn_id = BIN(opt_str_lit);
diff --git a/defs/id.def b/defs/id.def
index f7fffbd..21aff93 100644
--- a/defs/id.def
+++ b/defs/id.def
@@ -57,6 +57,14 @@ firstline, predefined = __LINE__+1, %[\
core#hash_merge_ary
core#hash_merge_ptr
core#hash_merge_kwd
+ gsub
+ gsub!
+ sub
+ sub!
+ tr
+ tr!
+ tr_s
+ tr_s!
]
class KeywordError < RuntimeError
@@ -83,6 +91,7 @@ predefined.split(/^/).each_with_index do |line, num|
token = "_#{token.gsub(/\W+/, '_')}"
else
token = token.sub(/\?/, 'P').sub(/\A[a-z]/) {$&.upcase}
+ token.sub!(/!\z/, "_bang")
token.sub!(/\A\$/, "_G_")
token.sub!(/\A@@/, "_C_")
token.sub!(/\A@/, "_I_")
diff --git a/defs/opt_method.def b/defs/opt_method.def
index acc5e6b..e96cc9b 100644
--- a/defs/opt_method.def
+++ b/defs/opt_method.def
@@ -28,6 +28,14 @@ OPT_METHODS = [
%w(idSucc Fixnum String Time),
%w(idEqTilde Regexp String),
%w(idFreeze String),
+ %w(idGsub String),
+ %w(idGsub_bang String),
+ %w(idSub String),
+ %w(idSub_bang String),
+ %w(idTr String),
+ %w(idTr_bang String),
+ %w(idTr_s String),
+ %w(idTr_s_bang String),
]
# for checking optimized classes,
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index f27734b..21a9330 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2433,6 +2433,40 @@ class TestString < Test::Unit::TestCase
assert_equal [ "aaa" ], res.uniq!
end
end
+
+ def assert_no_new_allocations(mesg = "", adjust = 0)
+ before = GC.stat(:total_allocated_objects)
+ yield
+ after = GC.stat(:total_allocated_objects)
+ assert_equal before, after - adjust, mesg
+ end
+
+ def test_opt_str_lit_gsub
+ return if @cls != String
+ require_compile_option(:peephole_optimization)
+ foo = "foo"
+ re = /nomatch/
+ foo.gsub!(re, "00") # compile regexp
+ n = 3
+
+ assert_no_new_allocations("gsub var regexp") do
+ n.times { foo.gsub!(re, "00") }
+ end
+
+ # compiles re once:
+ assert_no_new_allocations("gsub lit regexp", 1) do
+ n.times { foo.gsub!(/nomatch/, "00") }
+ end
+
+ assert_no_new_allocations("gsub literal string") do
+ n.times { foo.gsub!("nomatch", "00") }
+ end
+
+ ary = [ [ re ] ]
+ assert_no_new_allocations("bigger stack") do
+ n.times { foo.gsub!(ary[0][0], "00") }
+ end
+ end
end
class TestString2 < TestString
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 09/18] compile.c (opt_str_lit_1): hoist out of iseq_peephole_optimize
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (6 preceding siblings ...)
2014-10-18 2:42 ` [PATCH 08/18] optimize string allocations for sub/gsub/tr/tr_s(!) Eric Wong
@ 2014-10-18 2:42 ` Eric Wong
2014-10-18 2:42 ` [PATCH 10/18] test/ruby/test_string.rb: cleanup allocation tests Eric Wong
` (8 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:42 UTC (permalink / raw)
To: spew
---
compile.c | 67 ++++++++++++++++++++++++++++++++++-----------------------------
1 file changed, 36 insertions(+), 31 deletions(-)
diff --git a/compile.c b/compile.c
index 205ff6a..f1123cc 100644
--- a/compile.c
+++ b/compile.c
@@ -1746,21 +1746,51 @@ new_recvinfo_for_arg_(rb_iseq_t *iseq, VALUE str,
}
/*
+ * optimize allocation:
+ * hash["lit"] # hash lookups
+ * str == "lit"
+ * str != "lit"
+ * str << "lit"
+ * str + "lit"
+ * str === "lit"
+ */
+static VALUE
+opt_str_lit_1(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
+{
+ enum ruby_optimized_method om;
+ VALUE c;
+
+ switch (ci->mid) {
+#define C(mid,klass) case mid: om = OM_##mid##__##klass; c = rb_c##klass; break
+ C(idAREF, Hash);
+ C(idEq, String);
+ C(idNeq, String);
+ C(idLTLT, String);
+ C(idPLUS, String);
+ C(idEqq, String);
+#undef C
+ default: return Qfalse;
+ }
+
+ return new_recvinfo_for_arg_(iseq, str, om, c, 0);
+}
+
+/*
* optimize common calls which take two string literals:
* foo.sub(/../, "to")
* foo.sub!(/../, "to")
* foo.gsub(/../, "to")
* foo.gsub!(/../, "to")
- * foo.tr(/../, "to")
- * foo.tr!(/../, "to")
- * foo.tr_s(/../, "to")
- * foo.tr_s!(/../, "to")
+ * foo.tr("from", "to")
+ * foo.tr!("from", "to")
+ * foo.tr_s("from", "to")
+ * foo.tr_s!("from", "to")
*/
static VALUE
opt_str_lit_2(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
{
INSN *piobj;
- enum ruby_optimized_method om = OM_LAST_;
+ enum ruby_optimized_method om;
switch (ci->mid) {
#define C(mid) case mid: om = OM_##mid##__String; break
@@ -1943,32 +1973,7 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
}
break;
case 1:
- switch (ci->mid) {
- case idAREF:
- /* optimize allocation: obj["lit"] */
- ri = new_recvinfo_for_arg(iseq, str, idAREF, Hash, 0);
- break;
- case idEq:
- /* optimize allocation: obj == "lit" */
- ri = new_recvinfo_for_arg(iseq, str, idEq, String, 0);
- break;
- case idNeq:
- /* optimize allocation: obj != "lit" */
- ri = new_recvinfo_for_arg(iseq, str, idNeq, String, 0);
- break;
- case idLTLT:
- /* optimize allocation: obj << "lit" */
- ri = new_recvinfo_for_arg(iseq, str, idLTLT, String, 0);
- break;
- case idPLUS:
- /* optimize allocation: obj + "lit" */
- ri = new_recvinfo_for_arg(iseq, str, idPLUS, String, 0);
- break;
- case idEqq:
- /* optimize allocation: obj === "lit" */
- ri = new_recvinfo_for_arg(iseq, str, idEqq, String, 0);
- break;
- }
+ ri = opt_str_lit_1(iseq, str, ci, (INSN *)list);
break;
case 2:
ri = opt_str_lit_2(iseq, str, ci, (INSN *)list);
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 10/18] test/ruby/test_string.rb: cleanup allocation tests
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (7 preceding siblings ...)
2014-10-18 2:42 ` [PATCH 09/18] compile.c (opt_str_lit_1): hoist out of iseq_peephole_optimize Eric Wong
@ 2014-10-18 2:42 ` Eric Wong
2014-10-18 2:42 ` [PATCH 11/18] test_string: cleanup tests Eric Wong
` (7 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:42 UTC (permalink / raw)
To: spew
---
test/ruby/test_string.rb | 76 ++++++++++++++++++++++++------------------------
1 file changed, 38 insertions(+), 38 deletions(-)
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 21a9330..bff55fd 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1912,9 +1912,9 @@ class TestString < Test::Unit::TestCase
def test_literal_freeze
require_compile_option(:peephole_optimization)
- before = GC.stat(:total_allocated_objects)
- 5.times { "".freeze }
- assert_equal before, GC.stat(:total_allocated_objects)
+ assert_no_new_allocations do
+ 5.times { "".freeze }
+ end
end
class S2 < String
@@ -2298,14 +2298,14 @@ class TestString < Test::Unit::TestCase
if @cls == String
nr = 10
recv = ""
- before = GC.stat(:total_allocated_objects)
- nr.times { recv << "constant" }
- assert_equal before, GC.stat(:total_allocated_objects)
+ assert_no_new_allocations do
+ nr.times { recv << "constant" }
+ end
assert_equal "constant" * nr, recv
- before = GC.stat(:total_allocated_objects)
- nr.times { "recv" << "constant" }
- assert_equal before + nr, GC.stat(:total_allocated_objects)
+ assert_no_new_allocations("'lit' << 'lit' (LTLT)", nr) do
+ nr.times { "recv" << "constant" }
+ end
end
end
@@ -2388,48 +2388,48 @@ class TestString < Test::Unit::TestCase
recv = "something"
res = []
- before = GC.stat(:total_allocated_objects)
- nr.times { res << (recv == "constant") } # opt_streq1
- nr.times { res << ("constant" == recv) } # opt_streq2
- nr.times { res << ("something" != recv) } # 1st pass peephole
- nr.times { res << ("constant" == recv) } # opt_streq2
- nr.times { res << ("constant" === recv) } # opt_streqq2
- nr.times { res << (recv != "something") } # 2nd pass peephole
- assert_equal before, GC.stat(:total_allocated_objects)
+ assert_no_new_allocations("false comparisons") do
+ nr.times { res << (recv == "constant") } # opt_streq1
+ nr.times { res << ("constant" == recv) } # opt_streq2
+ nr.times { res << ("something" != recv) } # 1st pass peephole
+ nr.times { res << ("constant" == recv) } # opt_streq2
+ nr.times { res << ("constant" === recv) } # opt_streqq2
+ nr.times { res << (recv != "something") } # 2nd pass peephole
+ end
assert_equal [ false ], res.uniq!
res.clear
- before = GC.stat(:total_allocated_objects)
- nr.times { res << (recv == "something") } # opt_streq1
- nr.times { res << ("something" == recv) } # opt_streq2
- nr.times { res << ("something" === recv) } # opt_streqq2
- nr.times { res << (recv === "something") } # opt_streqq2
- nr.times { res << ("constant" != recv) } # 1st pass peephole
- nr.times { res << (recv != "constant") } # 2nd pass peephole
- nr.times { res << ("a" != "b") } # 1st pass peephole
- nr.times { res << ("a" == "a") } # 1st pass peephole
- nr.times { res << ("".size == 0) } # 2nd pass peephole
- nr.times { res << ("".length == 0) } # 2nd pass peephole
- assert_equal before, GC.stat(:total_allocated_objects)
+ assert_no_new_allocations("true comparisons") do
+ nr.times { res << (recv == "something") } # opt_streq1
+ nr.times { res << ("something" == recv) } # opt_streq2
+ nr.times { res << ("something" === recv) } # opt_streqq2
+ nr.times { res << (recv === "something") } # opt_streqq2
+ nr.times { res << ("constant" != recv) } # 1st pass peephole
+ nr.times { res << (recv != "constant") } # 2nd pass peephole
+ nr.times { res << ("a" != "b") } # 1st pass peephole
+ nr.times { res << ("a" == "a") } # 1st pass peephole
+ nr.times { res << ("".size == 0) } # 2nd pass peephole
+ nr.times { res << ("".length == 0) } # 2nd pass peephole
+ end
assert_equal [ true ], res.uniq!
# :+ optimizations
res.clear
- before = GC.stat(:total_allocated_objects)
- nr.times { res << ("foo" + recv) }
- assert_equal before + nr, GC.stat(:total_allocated_objects)
+ assert_no_new_allocations("'str' + (PLUS)", nr) do
+ nr.times { res << ("foo" + recv) }
+ end
assert_equal [ "foosomething" ], res.uniq!
res.clear
- before = GC.stat(:total_allocated_objects)
- nr.times { res << (recv + "foo") }
- assert_equal before + nr, GC.stat(:total_allocated_objects)
+ assert_no_new_allocations("+ 'str' (PLUS)", nr) do
+ nr.times { res << (recv + "foo") }
+ end
assert_equal [ "somethingfoo" ], res.uniq!
res.clear
- before = GC.stat(:total_allocated_objects)
- nr.times { res << ('a' * 3) }
- assert_equal before + nr, GC.stat(:total_allocated_objects)
+ assert_no_new_allocations("'str' * (MULT)", nr) do
+ nr.times { res << ('a' * 3) }
+ end
assert_equal [ "aaa" ], res.uniq!
end
end
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 11/18] test_string: cleanup tests
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (8 preceding siblings ...)
2014-10-18 2:42 ` [PATCH 10/18] test/ruby/test_string.rb: cleanup allocation tests Eric Wong
@ 2014-10-18 2:42 ` Eric Wong
2014-10-18 2:42 ` [PATCH 12/18] opt_method.inc.tmpl: flatten Eric Wong
` (6 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:42 UTC (permalink / raw)
To: spew
---
test/ruby/test_string.rb | 63 ------------------------------------------------
1 file changed, 63 deletions(-)
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index bff55fd..4f47522 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2283,18 +2283,6 @@ class TestString < Test::Unit::TestCase
# enable only when string size range is smaller than memory space
def test_opt_strcat_with
- assert_separately([], <<-RUBY)
- class String
- undef <<
- def <<(str)
- "overridden"
- end
- end
- assert_equal("overridden", "" << "foo")
- foo = "foo"
- assert_equal("overridden", foo << "bar")
- RUBY
-
if @cls == String
nr = 10
recv = ""
@@ -2312,57 +2300,6 @@ class TestString < Test::Unit::TestCase
def test_opt_str_lit
assert_separately([], <<-RUBY)
class String
- undef ==
- def ==(str)
- :TROO
- end
- end
- foo = "foo"
- assert_equal(:TROO, (foo == "foo"), 'string == "peephole 2nd pass"')
- assert_equal(:TROO, ("foo" == foo), '"yoda 1st pass" == string')
- RUBY
-
- assert_separately([], <<-RUBY)
- class String
- undef !=
- def !=(str)
- :NOT
- end
- end
- foo = ""
- assert_equal(:NOT, ("foo" != foo), '"yoda 1st pass" != string')
- assert_equal(:NOT, (foo != "foo"), 'string != "peephole 2nd pass"')
- RUBY
-
- assert_separately([], <<-RUBY)
- class String
- undef size
- undef length
- def size
- 42
- end
- def length
- 42
- end
- end
- assert_equal(42, "".size, 'lit string size')
- assert_equal(42, "".length, 'lit string size')
- RUBY
-
- assert_separately([], <<-RUBY)
- class String
- undef +
- def +(other)
- :plus
- end
- end
- foo = "a"
- assert_equal(:plus, "" + foo, 'lit plus')
- assert_equal(:plus, foo + "", 'plus lit')
- RUBY
-
- assert_separately([], <<-RUBY)
- class String
undef *
def *(other)
:mult
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 12/18] opt_method.inc.tmpl: flatten
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (9 preceding siblings ...)
2014-10-18 2:42 ` [PATCH 11/18] test_string: cleanup tests Eric Wong
@ 2014-10-18 2:42 ` Eric Wong
2014-10-18 2:42 ` [PATCH 13/18] opt_str_lit: switch to type mask for raw class comparisons Eric Wong
` (5 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:42 UTC (permalink / raw)
To: spew
---
template/opt_method.inc.tmpl | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/template/opt_method.inc.tmpl b/template/opt_method.inc.tmpl
index 0501121..acbdc1a 100644
--- a/template/opt_method.inc.tmpl
+++ b/template/opt_method.inc.tmpl
@@ -23,17 +23,10 @@ vm_init_redefined_flags(void *tbl)
{
<%
OPT_METHODS.each do |(mid, *classes)|
- classes.each do |klass|
- if Array === klass
- klass.each do |k|
-%>
- add_opt_method(tbl, rb_c<%= k %>, <%= mid %>, <%= om(mid, k) %>);
-<%
- end # klass.each
- else
+ classes.flatten.each do |klass|
%>
add_opt_method(tbl, rb_c<%= klass %>, <%= mid %>, <%= om(mid, klass) %>);
-<% end # !(Array === klass)
+<%
end # classes.each
end # OPT_METHODS.each
%>
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 13/18] opt_str_lit: switch to type mask for raw class comparisons
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (10 preceding siblings ...)
2014-10-18 2:42 ` [PATCH 12/18] opt_method.inc.tmpl: flatten Eric Wong
@ 2014-10-18 2:42 ` Eric Wong
2014-10-18 2:42 ` [PATCH 14/18] opt_str_lit: optimize delete Eric Wong
` (4 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:42 UTC (permalink / raw)
To: spew
This allows optimizations for method names common to multiple
classes.
---
compile.c | 22 ++++++++++------
defs/opt_method.def | 4 +++
insns.def | 66 ++++++++++++++++++++++++++++++++--------------
template/opt_method.h.tmpl | 40 ++++++++++++++++++++++++++++
4 files changed, 104 insertions(+), 28 deletions(-)
diff --git a/compile.c b/compile.c
index f1123cc..ff088dd 100644
--- a/compile.c
+++ b/compile.c
@@ -1731,13 +1731,15 @@ new_recvinfo_for_call_(rb_iseq_t *iseq, VALUE str,
}
#define new_recvinfo_for_arg(iseq,str,mid,klass,off) \
- new_recvinfo_for_arg_((iseq),(str),OM_##mid##__##klass,(rb_c##klass),(off))
+ new_recvinfo_for_arg_((iseq),(str),(OM_##mid##__##klass),\
+ (OM_TMASK_##klass),(off))
static VALUE
new_recvinfo_for_arg_(rb_iseq_t *iseq, VALUE str,
- enum ruby_optimized_method om, VALUE klass, int recv_off)
+ enum ruby_optimized_method om,
+ VALUE tmask, int recv_off)
{
VALUE ri = rb_ary_new_from_args(4, str, INT2FIX(om),
- klass, INT2FIX(recv_off));
+ tmask, INT2FIX(recv_off));
hide_obj(ri);
iseq_add_mark_object(iseq, ri);
@@ -1758,10 +1760,14 @@ static VALUE
opt_str_lit_1(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
{
enum ruby_optimized_method om;
- VALUE c;
+ VALUE tmask;
switch (ci->mid) {
-#define C(mid,klass) case mid: om = OM_##mid##__##klass; c = rb_c##klass; break
+#define C(mid,klass) \
+ case mid: \
+ om = OM_##mid##__##klass; \
+ tmask = OM_TMASK_##klass; \
+ break
C(idAREF, Hash);
C(idEq, String);
C(idNeq, String);
@@ -1772,7 +1778,7 @@ opt_str_lit_1(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
default: return Qfalse;
}
- return new_recvinfo_for_arg_(iseq, str, om, c, 0);
+ return new_recvinfo_for_arg_(iseq, str, om, tmask, 0);
}
/*
@@ -1815,12 +1821,12 @@ opt_str_lit_2(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
piobj = (INSN *)get_prev_insn(list);
if (piobj && piobj->insn_id == BIN(putstring)) {
VALUE pstr = piobj->operands[0];
- VALUE pri = new_recvinfo_for_arg_(iseq, pstr, om, rb_cString, 0);
+ VALUE pri = new_recvinfo_for_arg_(iseq, pstr, om, OM_TMASK_String, 0);
piobj->operands[0] = pri;
piobj->insn_id = BIN(opt_str_lit);
}
- return new_recvinfo_for_arg_(iseq, str, om, rb_cString, 1);
+ return new_recvinfo_for_arg_(iseq, str, om, OM_TMASK_String, 1);
}
static int
diff --git a/defs/opt_method.def b/defs/opt_method.def
index e96cc9b..0be4f20 100644
--- a/defs/opt_method.def
+++ b/defs/opt_method.def
@@ -55,3 +55,7 @@ def om(mid, klass)
"OM_#{mid}__#{klass}"
end
end
+
+IS_T_DATA = {
+ "Time" => true
+}
diff --git a/insns.def b/insns.def
index e304338..f2bbaae 100644
--- a/insns.def
+++ b/insns.def
@@ -369,44 +369,70 @@ opt_str_lit
* 0 - str
* 1 - optimized method flag (OM_*)
* optional:
- * 2 - Class (optimized receiver class) or Symbol (method name)
- * 3 - stack offset (Fixint), only present if [3] is a Class,
+ * 2 - class, tmask (optimized receiver classes) or Symbol (method name)
+ * 3 - stack offset (Fixint), only present if [2] is a Class,
* -1 stack offset means receiver is the frozen string literal itself
*/
const VALUE *ri = RARRAY_CONST_PTR(recv_info);
long len = RARRAY_LEN(recv_info);
- enum ruby_optimized_method om = FIX2INT(ri[1]);
+ enum ruby_optimized_method om = FIX2UINT(ri[1]);
val = ri[0]; /* hopefully, this is the only val assignment we need */
+
if (len > 2) {
- VALUE msym_or_class = ri[2];
+ VALUE cmask = ri[2];
- /* check if the receiver is an on-stack object: */
- if (!SYMBOL_P(msym_or_class)) {
+ switch (TYPE(cmask)) {
+ case RUBY_T_FIXNUM: { /* tmask, most cases */
int n = FIX2INT(ri[3]);
VALUE recv = n < 0 ? val : TOPN(n);
- if (SPECIAL_CONST_P(recv) ||
- RBASIC_CLASS(recv) != msym_or_class ||
- !rb_basic_op_unredefined_p(om)) {
- /* bad, somebody redefined an optimized method, slow path: */
- val = rb_str_resurrect(val);
+ if (!SPECIAL_CONST_P(recv)) {
+ int tmask = FIX2INT(cmask);
+ enum ruby_value_type btype = BUILTIN_TYPE(recv);
+ int rmask = 1 << btype;
+
+ if ((rmask & tmask) &&
+ (rb_opt_method_class(btype) == RBASIC_CLASS(recv))) {
+ if (rb_opt_method_is_mask(om)) {
+ if (rb_basic_mask_unredefined_p(om)) {
+ goto out;
+ }
+ }
+ else if (rb_basic_op_unredefined_p(om)) {
+ goto out;
+ }
+ }
}
- }
- else { /* receiver is the string literal itself (e.g. "str".freeze) */
- if (!rb_basic_op_unredefined_p(om)) {
- /* bad, somebody redefined an optimized method, slow path: */
+ goto do_resurrect;
+ }
+ case RUBY_T_CLASS: { /* T_DATA oddities (Time#strftime) */
+ int n = FIX2INT(ri[3]);
+ VALUE recv = n < 0 ? val : TOPN(n);
+
+ if (cmask == RBASIC_CLASS(recv) && rb_basic_op_unredefined_p(om)) {
+ goto out;
+ }
+ goto do_resurrect;
+ }
+ case RUBY_T_SYMBOL:
+ /* receiver is the string literal itself: */
+ if (UNLIKELY(!rb_basic_op_unredefined_p(om))) {
val = rb_str_resurrect(val);
- val = rb_funcall(val, SYM2ID(msym_or_class), 0);
+ val = rb_funcall(val, SYM2ID(cmask), 0);
}
+ goto out;
+ default:
+ rb_bug("bad type as cmask: %+"PRIsVALUE, cmask);
}
+ UNREACHABLE;
}
else { /* string lit is receiver, but there are args */
- if (!rb_basic_op_unredefined_p(om)) {
- /* bad, somebody redefined an optimized method, slow path: */
- val = rb_str_resurrect(val);
- }
+ if (rb_basic_op_unredefined_p(om)) goto out;
}
+do_resurrect:
+ val = rb_str_resurrect(val);
+out:
}
/**
diff --git a/template/opt_method.h.tmpl b/template/opt_method.h.tmpl
index 39c4043..ccff31d 100644
--- a/template/opt_method.h.tmpl
+++ b/template/opt_method.h.tmpl
@@ -4,18 +4,21 @@
<%
defs = File.join(File.dirname(File.dirname(erb.filename)), "defs/opt_method.def")
eval(File.read(defs), binding, defs)
+tmasks = []
%>
typedef uint<%= OM_ALIGN %>_t rb_om_bitmap_t;
enum ruby_optimized_method {
<%
opt_masks = {}
+mask_classes = {}
n = 0
OPT_METHODS.each do |(mid, *classes)|
classes.each do |klass|
if Array === klass
opt_masks[mid] = klass.dup
# we will align these in the second loop, below
+ klass.each { |k| mask_classes[k] = true }
next
end %>
<%= om(mid, klass) %> = <%= n += 1 %>,
@@ -61,11 +64,48 @@ opt_masks.each do |mid, c|
')'
%>),
<%
+ # mask for type checking in insns.def, we name this like the OM_*
+ # enum so it is easy to get this name using CPP macros
+ tmasks << [
+ "OM_TMASK_#{c.join('_')}",
+ 'INT2FIX(' +
+ c.map {|k| "(1U << RUBY_T_#{k.upcase})" }.join("|\\\n\t") +
+ ')'
+ ]
end # opt_masks.each
+opt_classes.each_key do |k|
+ if IS_T_DATA[k]
+ tmasks << [ "OM_TMASK_#{k}", "rb_c#{k}" ]
+ else
+ tmasks << [ "OM_TMASK_#{k}", "INT2FIX(1U << RUBY_T_#{k.upcase})" ]
+ end
+end # opt_classes.each_key
%>
OM_ALIGN_ = <%= OM_ALIGN %>,
OM_SIZE_ = <%= ((om_last + OM_ALIGN) & OM_ALIGN_MASK) / OM_ALIGN %>,
OM_GETMASK_ = (1 << OM_ALIGN_) - 1
};
+/* macros */
+<% tmasks.each do |(k,v)| %>
+#define <%= k %> (<%= v %>)
+<% end %>
+
+/* map a raw type to the preferred (optimized) class */
+static inline VALUE
+rb_opt_method_class(enum ruby_value_type type)
+{
+ switch (type) {
+<%
+opt_classes.each_key do |k|
+ next if IS_T_DATA[k]
+%>
+ case RUBY_T_<%= k.upcase %>: return rb_c<%= k %>;
+<%
+end
+%>
+ default: return Qfalse;
+ }
+}
+
#endif /* RUBY_OPT_METHOD_H */
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 14/18] opt_str_lit: optimize delete
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (11 preceding siblings ...)
2014-10-18 2:42 ` [PATCH 13/18] opt_str_lit: switch to type mask for raw class comparisons Eric Wong
@ 2014-10-18 2:42 ` Eric Wong
2014-10-18 2:42 ` [PATCH 15/18] opt_str_lit: optimize include? Eric Wong
` (3 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:42 UTC (permalink / raw)
To: spew
---
benchmark/bm_vm2_array_delete_lit.rb | 6 ++++++
benchmark/bm_vm2_hash_delete_lit.rb | 6 ++++++
benchmark/bm_vm2_str_delete.rb | 6 ++++++
compile.c | 1 +
defs/id.def | 1 +
defs/opt_method.def | 1 +
test/ruby/test_optimization.rb | 23 +++++++++++++++++++++++
test/ruby/test_string.rb | 20 ++++++++++++++++++++
8 files changed, 64 insertions(+)
create mode 100644 benchmark/bm_vm2_array_delete_lit.rb
create mode 100644 benchmark/bm_vm2_hash_delete_lit.rb
create mode 100644 benchmark/bm_vm2_str_delete.rb
diff --git a/benchmark/bm_vm2_array_delete_lit.rb b/benchmark/bm_vm2_array_delete_lit.rb
new file mode 100644
index 0000000..60d599a
--- /dev/null
+++ b/benchmark/bm_vm2_array_delete_lit.rb
@@ -0,0 +1,6 @@
+ary = []
+i = 0
+while i<6_000_000 # while loop 2
+ i += 1
+ ary.delete("foo")
+end
diff --git a/benchmark/bm_vm2_hash_delete_lit.rb b/benchmark/bm_vm2_hash_delete_lit.rb
new file mode 100644
index 0000000..22dd95f
--- /dev/null
+++ b/benchmark/bm_vm2_hash_delete_lit.rb
@@ -0,0 +1,6 @@
+h = {}
+i = 0
+while i<6_000_000 # while loop 2
+ i += 1
+ h.delete("foo")
+end
diff --git a/benchmark/bm_vm2_str_delete.rb b/benchmark/bm_vm2_str_delete.rb
new file mode 100644
index 0000000..c242f29
--- /dev/null
+++ b/benchmark/bm_vm2_str_delete.rb
@@ -0,0 +1,6 @@
+str = ''
+i = 0
+while i<6_000_000 # while loop 2
+ i += 1
+ str.delete("foo")
+end
diff --git a/compile.c b/compile.c
index ff088dd..9789a1d 100644
--- a/compile.c
+++ b/compile.c
@@ -1774,6 +1774,7 @@ opt_str_lit_1(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
C(idLTLT, String);
C(idPLUS, String);
C(idEqq, String);
+ C(idDelete, Array_Hash_String);
#undef C
default: return Qfalse;
}
diff --git a/defs/id.def b/defs/id.def
index 21aff93..58df716 100644
--- a/defs/id.def
+++ b/defs/id.def
@@ -65,6 +65,7 @@ firstline, predefined = __LINE__+1, %[\
tr!
tr_s
tr_s!
+ delete
]
class KeywordError < RuntimeError
diff --git a/defs/opt_method.def b/defs/opt_method.def
index 0be4f20..6cd80b2 100644
--- a/defs/opt_method.def
+++ b/defs/opt_method.def
@@ -36,6 +36,7 @@ OPT_METHODS = [
%w(idTr_bang String),
%w(idTr_s String),
%w(idTr_s_bang String),
+ [ "idDelete", %w(Array Hash String) ],
]
# for checking optimized classes,
diff --git a/test/ruby/test_optimization.rb b/test/ruby/test_optimization.rb
index 129f62a..29ca9dd 100644
--- a/test/ruby/test_optimization.rb
+++ b/test/ruby/test_optimization.rb
@@ -140,6 +140,12 @@ class TestRubyOptimization < Test::Unit::TestCase
assert_redefine_method('String', '<<', 'assert_equal "b", "a" << "b"')
end
+ def test_string_delete
+ assert_equal "foo", "foobar".delete("bar")
+ assert_redefine_method('String', 'delete',
+ 'assert_equal "b", "a".delete("b")')
+ end
+
def test_array_plus
assert_equal [1,2], [1]+[2]
assert_redefine_method('Array', '+', 'assert_equal [2], [1]+[2]')
@@ -162,6 +168,15 @@ class TestRubyOptimization < Test::Unit::TestCase
assert_redefine_method('Array', 'empty?', 'assert_nil([].empty?); assert_nil([1,2,3].empty?)')
end
+ def test_array_delete
+ assert_equal "c", %w(a b c).delete("c")
+ assert_redefine_method('Array', 'delete', <<-end)
+ x = []
+ assert_equal x.object_id, [].delete(x).object_id
+ assert_equal x.object_id, %w(a b c).delete(x).object_id
+ end
+ end
+
def test_hash_length
assert_equal 0, {}.length
assert_equal 1, {1=>1}.length
@@ -193,6 +208,14 @@ class TestRubyOptimization < Test::Unit::TestCase
end
end
+ def test_hash_delete
+ assert_equal(1, { "c" => 1 }.delete("c"))
+ assert_redefine_method('Hash', 'delete', <<-end)
+ assert_equal "c", {}.delete("c")
+ assert_equal "c", {"c" => 1}.delete("c")
+ end
+ end
+
class MyObj
def ==(other)
true
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 4f47522..1440ae2 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1917,6 +1917,26 @@ class TestString < Test::Unit::TestCase
end
end
+ def test_literal_delete_args
+ require_compile_option(:peephole_optimization)
+ return unless @cls == String
+ nr = 5
+ objs = [ {}, [] ]
+ assert_no_new_allocations do
+ nr.times { objs.each { |obj| obj.delete("foo") } }
+ end
+ objs = [ {"foo" => 1}, %w(foo) ]
+ assert_no_new_allocations do
+ nr.times { objs.each { |obj| obj.delete("foo") } }
+ end
+
+ str = "foo"
+ assert_no_new_allocations("String#delete", nr * 2) do
+ nr.times { str.delete('O') }
+ nr.times { str.delete('o') }
+ end
+ end
+
class S2 < String
end
def test_str_new4
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 15/18] opt_str_lit: optimize include?
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (12 preceding siblings ...)
2014-10-18 2:42 ` [PATCH 14/18] opt_str_lit: optimize delete Eric Wong
@ 2014-10-18 2:42 ` Eric Wong
2014-10-18 2:42 ` [PATCH 16/18] opt_str_lit: disable optimization on mismatch Eric Wong
` (2 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:42 UTC (permalink / raw)
To: spew
---
benchmark/bm_vm2_array_include_lit.rb | 6 ++++++
compile.c | 1 +
defs/id.def | 1 +
defs/opt_method.def | 1 +
test/ruby/test_optimization.rb | 6 ++++++
test/ruby/test_string.rb | 9 +++++++++
6 files changed, 24 insertions(+)
create mode 100644 benchmark/bm_vm2_array_include_lit.rb
diff --git a/benchmark/bm_vm2_array_include_lit.rb b/benchmark/bm_vm2_array_include_lit.rb
new file mode 100644
index 0000000..c81e230
--- /dev/null
+++ b/benchmark/bm_vm2_array_include_lit.rb
@@ -0,0 +1,6 @@
+ary = []
+i = 0
+while i<6_000_000 # while loop 2
+ i += 1
+ ary.include?("foo")
+end
diff --git a/compile.c b/compile.c
index 9789a1d..ff8b061 100644
--- a/compile.c
+++ b/compile.c
@@ -1775,6 +1775,7 @@ opt_str_lit_1(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
C(idPLUS, String);
C(idEqq, String);
C(idDelete, Array_Hash_String);
+ C(idIncludeP, Array_Hash_String);
#undef C
default: return Qfalse;
}
diff --git a/defs/id.def b/defs/id.def
index 58df716..11ec7e5 100644
--- a/defs/id.def
+++ b/defs/id.def
@@ -66,6 +66,7 @@ firstline, predefined = __LINE__+1, %[\
tr_s
tr_s!
delete
+ include?
]
class KeywordError < RuntimeError
diff --git a/defs/opt_method.def b/defs/opt_method.def
index 6cd80b2..4652ac2 100644
--- a/defs/opt_method.def
+++ b/defs/opt_method.def
@@ -37,6 +37,7 @@ OPT_METHODS = [
%w(idTr_s String),
%w(idTr_s_bang String),
[ "idDelete", %w(Array Hash String) ],
+ [ "idIncludeP", %w(Array Hash String) ],
]
# for checking optimized classes,
diff --git a/test/ruby/test_optimization.rb b/test/ruby/test_optimization.rb
index 29ca9dd..49444bc 100644
--- a/test/ruby/test_optimization.rb
+++ b/test/ruby/test_optimization.rb
@@ -146,6 +146,12 @@ class TestRubyOptimization < Test::Unit::TestCase
'assert_equal "b", "a".delete("b")')
end
+ def test_string_include?
+ assert_equal true, "foobar".include?("bar")
+ assert_redefine_method('String', 'include?',
+ 'assert_equal "b", "a".include?("b")')
+ end
+
def test_array_plus
assert_equal [1,2], [1]+[2]
assert_redefine_method('Array', '+', 'assert_equal [2], [1]+[2]')
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 1440ae2..d98355e 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1937,6 +1937,15 @@ class TestString < Test::Unit::TestCase
end
end
+ def test_literal_include_p_args
+ require_compile_option(:peephole_optimization)
+ return unless @cls == String
+ objs = [ {"foo" => 1}, %w(foo), "foo", "", {}, [] ]
+ assert_no_new_allocations do
+ 5.times { objs.each { |obj| obj.include?("foo") } }
+ end
+ end
+
class S2 < String
end
def test_str_new4
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 16/18] opt_str_lit: disable optimization on mismatch
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (13 preceding siblings ...)
2014-10-18 2:42 ` [PATCH 15/18] opt_str_lit: optimize include? Eric Wong
@ 2014-10-18 2:42 ` Eric Wong
2014-10-18 2:42 ` [PATCH 17/18] opt_str_lit: avoid allocations for Time#strftime Eric Wong
2014-10-18 2:42 ` [PATCH 18/18] opt_str_lit: optimize a lot more easy cases (untested) Eric Wong
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:42 UTC (permalink / raw)
To: spew
Generally, a call site gets the same receiver type over and over
again, this prevents performance regressions for folks who use
Set#include? with string literal args.
---
benchmark/bm_vm2_set_include_lit.rb | 7 +++++++
compile.c | 33 +++++++++++++++++++++++++++++++++
insns.def | 5 +++++
vm_insnhelper.h | 4 ++++
4 files changed, 49 insertions(+)
create mode 100644 benchmark/bm_vm2_set_include_lit.rb
diff --git a/benchmark/bm_vm2_set_include_lit.rb b/benchmark/bm_vm2_set_include_lit.rb
new file mode 100644
index 0000000..25d8b89
--- /dev/null
+++ b/benchmark/bm_vm2_set_include_lit.rb
@@ -0,0 +1,7 @@
+require 'set'
+set = Set.new
+i = 0
+while i<6_000_000 # while loop 2
+ i += 1
+ set.include?("foo")
+end
diff --git a/compile.c b/compile.c
index ff8b061..348ba3f 100644
--- a/compile.c
+++ b/compile.c
@@ -6096,3 +6096,36 @@ rb_parse_in_main(void)
{
return GET_THREAD()->parse_in_eval < 0;
}
+
+/*
+ * Live bytecode patch:
+ * - opt_str_lit(recv_info)
+ * + putstring(str) # str is recv_info[0]
+ *
+ * If allocation optimization fails at this call site once, assume it
+ * will fail in the future. This prevents performance regressions for
+ * things like #include? calls which may be used with unoptimized
+ * classes (Set,*DBM and many others) as well as optimized core classes
+ * (Array/Hash/String). Call sites which only use optimized core
+ * classes will never get here.
+ */
+void
+rb_undo_opt_str_lit(rb_control_frame_t *cfp)
+{
+ VALUE *insn = cfp->pc - insn_len(BIN(opt_str_lit));
+
+#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
+ const void * const *table = rb_vm_get_insns_address_table();
+
+ assert((VALUE)table[BIN(opt_str_lit)] == insn[0] && "mismatch");
+ insn[0] = (VALUE)table[BIN(putstring)];
+#else
+ assert((VALUE)BIN(opt_str_lit) == insn[0] && "mismatch");
+ insn[0] = (VALUE)BIN(putstring);
+#endif
+ assert(insn_len(BIN(opt_str_lit)) == insn_len(BIN(putstring)));
+ assert(T_ARRAY == BUILTIN_TYPE(insn[1]));
+
+ /* n.b.: recv_info remains marked */
+ insn[1] = RARRAY_AREF(insn[1], 0); /* recv_info[0] == str */
+}
diff --git a/insns.def b/insns.def
index f2bbaae..8855e06 100644
--- a/insns.def
+++ b/insns.def
@@ -420,6 +420,10 @@ opt_str_lit
if (UNLIKELY(!rb_basic_op_unredefined_p(om))) {
val = rb_str_resurrect(val);
val = rb_funcall(val, SYM2ID(cmask), 0);
+ /*
+ * do not bother with: rb_undo_opt_str_lit(GET_CFP());
+ * here, it is crazy to redefine core String methods :P
+ */
}
goto out;
default:
@@ -432,6 +436,7 @@ opt_str_lit
}
do_resurrect:
val = rb_str_resurrect(val);
+ rb_undo_opt_str_lit(GET_CFP());
out:
}
diff --git a/vm_insnhelper.h b/vm_insnhelper.h
index a4290ee..51dd658 100644
--- a/vm_insnhelper.h
+++ b/vm_insnhelper.h
@@ -253,4 +253,8 @@ rb_opt_method_is_mask(enum ruby_optimized_method om)
{
return !!((int)om < 0);
}
+
+/* compile.c */
+void rb_undo_opt_str_lit(rb_control_frame_t *cfp);
+
#endif /* RUBY_INSNHELPER_H */
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 17/18] opt_str_lit: avoid allocations for Time#strftime
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (14 preceding siblings ...)
2014-10-18 2:42 ` [PATCH 16/18] opt_str_lit: disable optimization on mismatch Eric Wong
@ 2014-10-18 2:42 ` Eric Wong
2014-10-18 2:42 ` [PATCH 18/18] opt_str_lit: optimize a lot more easy cases (untested) Eric Wong
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:42 UTC (permalink / raw)
To: spew
This may be useful for some folks who want to know the
time very frequently. Eventually, I hope to avoid allocations
of Time objects entirely in the case of Time.now.strftime("...")
---
compile.c | 1 +
defs/id.def | 1 +
defs/opt_method.def | 1 +
3 files changed, 3 insertions(+)
diff --git a/compile.c b/compile.c
index 348ba3f..57b38c1 100644
--- a/compile.c
+++ b/compile.c
@@ -1776,6 +1776,7 @@ opt_str_lit_1(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
C(idEqq, String);
C(idDelete, Array_Hash_String);
C(idIncludeP, Array_Hash_String);
+ C(idStrftime, Time);
#undef C
default: return Qfalse;
}
diff --git a/defs/id.def b/defs/id.def
index 11ec7e5..4271244 100644
--- a/defs/id.def
+++ b/defs/id.def
@@ -67,6 +67,7 @@ firstline, predefined = __LINE__+1, %[\
tr_s!
delete
include?
+ strftime
]
class KeywordError < RuntimeError
diff --git a/defs/opt_method.def b/defs/opt_method.def
index 4652ac2..d3fca06 100644
--- a/defs/opt_method.def
+++ b/defs/opt_method.def
@@ -38,6 +38,7 @@ OPT_METHODS = [
%w(idTr_s_bang String),
[ "idDelete", %w(Array Hash String) ],
[ "idIncludeP", %w(Array Hash String) ],
+ %w(idStrftime Time),
]
# for checking optimized classes,
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 18/18] opt_str_lit: optimize a lot more easy cases (untested)
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
` (15 preceding siblings ...)
2014-10-18 2:42 ` [PATCH 17/18] opt_str_lit: avoid allocations for Time#strftime Eric Wong
@ 2014-10-18 2:42 ` Eric Wong
16 siblings, 0 replies; 18+ messages in thread
From: Eric Wong @ 2014-10-18 2:42 UTC (permalink / raw)
To: spew
There should be a way to auto-generate tests for these...
---
compile.c | 27 +++++++++++++++++++++++++++
defs/id.def | 23 +++++++++++++++++++++++
defs/opt_method.def | 23 +++++++++++++++++++++++
3 files changed, 73 insertions(+)
diff --git a/compile.c b/compile.c
index 57b38c1..d371985 100644
--- a/compile.c
+++ b/compile.c
@@ -1776,7 +1776,29 @@ opt_str_lit_1(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
C(idEqq, String);
C(idDelete, Array_Hash_String);
C(idIncludeP, Array_Hash_String);
+ C(idMemberP, Hash);
+ C(idHas_keyP, Hash);
+ C(idKeyP, Hash);
C(idStrftime, Time);
+ C(idPack, Array);
+ C(idUnpack, String);
+ C(idSplit, String); /* TODO: str.split("lit", num) */
+ C(idJoin, Array);
+ C(idCount, String);
+ C(idChomp, String);
+ C(idChomp_bang, String);
+ C(idSqueeze, String);
+ C(idSqueeze_bang, String);
+ C(idDelete_bang, String);
+ C(idEncode, String);
+ C(idEncode_bang, String);
+ C(idForce_encoding, String);
+ C(idIndex, String); /* TODO: str.index("lit", num) */
+ C(idRindex, String);
+ C(idMatch, String);
+ C(idCasecmp, String);
+ C(idStart_withP, String);
+ C(idEnd_withP, String);
#undef C
default: return Qfalse;
}
@@ -1811,6 +1833,11 @@ opt_str_lit_2(rb_iseq_t *iseq, VALUE str, rb_call_info_t *ci, INSN *list)
C(idTr_bang);
C(idTr_s);
C(idTr_s_bang);
+ C(idInsert); /* String#insert(num, "lit") */
+
+ /* String#encode("dst", "src") */
+ C(idEncode);
+ C(idEncode_bang);
#undef C
default: return Qfalse;
}
diff --git a/defs/id.def b/defs/id.def
index 4271244..43f87c7 100644
--- a/defs/id.def
+++ b/defs/id.def
@@ -66,8 +66,31 @@ firstline, predefined = __LINE__+1, %[\
tr_s
tr_s!
delete
+ delete!
include?
+ member?
+ has_key?
+ key?
+ count
+ chomp
+ chomp!
+ squeeze
+ squeeze!
strftime
+ pack
+ unpack
+ split
+ join
+ encode
+ encode!
+ force_encoding
+ index
+ rindex
+ match
+ casecmp
+ insert
+ start_with?
+ end_with?
]
class KeywordError < RuntimeError
diff --git a/defs/opt_method.def b/defs/opt_method.def
index d3fca06..4aa2c69 100644
--- a/defs/opt_method.def
+++ b/defs/opt_method.def
@@ -38,7 +38,30 @@ OPT_METHODS = [
%w(idTr_s_bang String),
[ "idDelete", %w(Array Hash String) ],
[ "idIncludeP", %w(Array Hash String) ],
+ %w(idMemberP Hash),
+ %w(idKeyP Hash),
+ %w(idHas_keyP Hash),
%w(idStrftime Time),
+ %w(idUnpack String),
+ %w(idPack Array),
+ %w(idSplit String),
+ %w(idJoin Array),
+ %w(idCount String),
+ %w(idChomp String),
+ %w(idChomp_bang String),
+ %w(idSqueeze String),
+ %w(idSqueeze_bang String),
+ %w(idDelete_bang String),
+ %w(idEncode String),
+ %w(idEncode_bang String),
+ %w(idForce_encoding String),
+ %w(idIndex String),
+ %w(idRindex String),
+ %w(idMatch String),
+ %w(idCasecmp String),
+ %w(idInsert String),
+ %w(idStart_withP String),
+ %w(idEnd_withP String),
]
# for checking optimized classes,
--
EW
^ permalink raw reply related [flat|nested] 18+ messages in thread
end of thread, other threads:[~2014-10-18 2:42 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-18 2:41 [PATCH 01/18] compile.c: move "literal" optimizations to peephole optimize Eric Wong
2014-10-18 2:41 ` [PATCH 02/18] add generic and flexible opt_str_lit insn Eric Wong
2014-10-18 2:41 ` [PATCH 03/18] compile.c: optimize << and == using putstring_for Eric Wong
2014-10-18 2:41 ` [PATCH 04/18] opt_str_lit: further optimizations and cleanups Eric Wong
2014-10-18 2:41 ` [PATCH 05/18] opt_str_lit: optimize allocations for +, %, * and === calls Eric Wong
2014-10-18 2:41 ` [PATCH 06/18] vm: automatically define optimized method enums Eric Wong
2014-10-18 2:41 ` [PATCH 07/18] fix mismerge Eric Wong
2014-10-18 2:42 ` [PATCH 08/18] optimize string allocations for sub/gsub/tr/tr_s(!) Eric Wong
2014-10-18 2:42 ` [PATCH 09/18] compile.c (opt_str_lit_1): hoist out of iseq_peephole_optimize Eric Wong
2014-10-18 2:42 ` [PATCH 10/18] test/ruby/test_string.rb: cleanup allocation tests Eric Wong
2014-10-18 2:42 ` [PATCH 11/18] test_string: cleanup tests Eric Wong
2014-10-18 2:42 ` [PATCH 12/18] opt_method.inc.tmpl: flatten Eric Wong
2014-10-18 2:42 ` [PATCH 13/18] opt_str_lit: switch to type mask for raw class comparisons Eric Wong
2014-10-18 2:42 ` [PATCH 14/18] opt_str_lit: optimize delete Eric Wong
2014-10-18 2:42 ` [PATCH 15/18] opt_str_lit: optimize include? Eric Wong
2014-10-18 2:42 ` [PATCH 16/18] opt_str_lit: disable optimization on mismatch Eric Wong
2014-10-18 2:42 ` [PATCH 17/18] opt_str_lit: avoid allocations for Time#strftime Eric Wong
2014-10-18 2:42 ` [PATCH 18/18] opt_str_lit: optimize a lot more easy cases (untested) Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).