From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] hash.c: improve integer/fixnum hashing
Date: Wed, 29 Jul 2015 23:10:11 +0000 [thread overview]
Message-ID: <1438211411-2776-1-git-send-email-e@80x24.org> (raw)
The low bits of Ruby object IDs are rarely populated in the current
implementation, so ensure the get used.
* hash.c (any_hash): skip rb_objid_hash for static syms
(rb_num_hash_start): extract from rb_ident_hash
(rb_objid_hash): call rb_num_hash_start
(rb_ident_hash): ditto
target 0: a (ruby 2.3.0dev (2015-07-30 trunk 51437) [x86_64-linux]
target 1: b (ruby 2.3.0dev (2015-07-30 patch 51437) [x86_64-linux]
benchmark results from Xeon E3-1230 v3 @ 3.30GHz (turbo disabled):
minimum results in each 10 measurements.
Execution time (sec)
name a b
hash_aref_dsym 0.316 0.300
hash_aref_dsym_long 5.106 5.063
hash_aref_fix 0.304 0.297
hash_aref_flo 0.061 0.060
hash_aref_miss 0.433 0.430
hash_aref_str 0.408 0.396
hash_aref_sym 0.312 0.306
hash_aref_sym_long 0.482 0.469
hash_flatten 0.385 0.273
hash_ident_flo 0.036 0.037
hash_ident_num 0.277 0.276
hash_ident_obj 0.291 0.284
hash_ident_str 0.289 0.286
hash_ident_sym 0.285 0.281
hash_keys 0.269 0.271
hash_shift 0.020 0.016
hash_values 0.264 0.264
loop_whileloop2 0.101 0.099
vm2_bighash* 3.066 2.972
Speedup ratio: compare with the result of `a' (greater is better)
name b
hash_aref_dsym 1.052
hash_aref_dsym_long 1.008
hash_aref_fix 1.024
hash_aref_flo 1.015
hash_aref_miss 1.007
hash_aref_str 1.031
hash_aref_sym 1.018
hash_aref_sym_long 1.027
hash_flatten 1.410
hash_ident_flo 0.994
hash_ident_num 1.001
hash_ident_obj 1.022
hash_ident_str 1.012
hash_ident_sym 1.016
hash_keys 0.992
hash_shift 1.237
hash_values 1.001
loop_whileloop2 1.013
vm2_bighash* 1.032
---
hash.c | 41 +++++++++++++++++++++++++++--------------
1 file changed, 27 insertions(+), 14 deletions(-)
diff --git a/hash.c b/hash.c
index 4d5b4c6..0da1084 100644
--- a/hash.c
+++ b/hash.c
@@ -138,7 +138,8 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
if (SPECIAL_CONST_P(a)) {
if (a == Qundef) return 0;
if (STATIC_SYM_P(a)) {
- a >>= (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
+ hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
+ goto out;
}
else if (FLONUM_P(a)) {
/* prevent pathological behavior: [Bug #10761] */
@@ -160,6 +161,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
else {
hnum = other_func(a);
}
+ out:
hnum <<= 1;
return (st_index_t)RSHIFT(hnum, 1);
}
@@ -177,10 +179,31 @@ rb_any_hash(VALUE a)
return any_hash(a, obj_any_hash);
}
+static st_index_t
+rb_num_hash_start(st_index_t n)
+{
+ /*
+ * This hash function is lightly-tuned for Ruby. Further tuning
+ * should be possible. Notes:
+ *
+ * - (n >> 3) alone is great for heap objects and OK for fixnum,
+ * however symbols perform poorly.
+ * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make
+ * symbols hash well
+ * n.b.: +1 (instead of 3) worked well initially, too
+ * - (n << 3) was finally added to avoid losing bits for fixnums
+ * - avoid expensive modulo instructions, it is currently only
+ * shifts and bitmask operations.
+ */
+ return (n >> (RUBY_SPECIAL_SHIFT + 3) | (n << 3)) ^ (n >> 3);
+}
+
long
rb_objid_hash(st_index_t index)
{
- st_index_t hnum = rb_hash_start(index);
+ st_index_t hnum = rb_num_hash_start(index);
+
+ hnum = rb_hash_start(hnum);
hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
hnum = rb_hash_end(hnum);
return hnum;
@@ -215,28 +238,18 @@ static const struct st_hash_type objhash = {
static st_index_t
rb_ident_hash(st_data_t n)
{
+#ifdef USE_FLONUM /* RUBY */
/*
- * This hash function is lightly-tuned for Ruby. Further tuning
- * should be possible. Notes:
- *
- * - (n >> 3) alone is great for heap objects and OK for fixnum,
- * however symbols perform poorly.
- * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
- * n.b.: +3 to remove ID scope, +1 worked well initially, too
- * - (n << 3) was finally added to avoid losing bits for fixnums
- * - avoid expensive modulo instructions, it is currently only
- * shifts and bitmask operations.
* - flonum (on 64-bit) is pathologically bad, mix the actual
* float value in, but do not use the float value as-is since
* many integers get interpreted as 2.0 or -2.0 [Bug #10761]
*/
-#ifdef USE_FLONUM /* RUBY */
if (FLONUM_P(n)) {
n ^= (st_data_t)rb_float_value(n);
}
#endif
- return (st_index_t)((n>>(RUBY_SPECIAL_SHIFT+3)|(n<<3)) ^ (n>>3));
+ return (st_index_t)rb_num_hash_start((st_index_t)n);
}
static const struct st_hash_type identhash = {
--
EW
next reply other threads:[~2015-07-29 23:10 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-29 23:10 Eric Wong [this message]
-- strict thread matches above, loose matches on Subject: below --
2015-07-29 23:14 [PATCH] hash.c: improve integer/fixnum hashing Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1438211411-2776-1-git-send-email-e@80x24.org \
--to=e@80x24.org \
--cc=spew@80x24.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).