dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH] hash.c: improve integer/fixnum hashing
@ 2015-07-29 23:10 Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2015-07-29 23:10 UTC (permalink / raw)
  To: spew

The low bits of Ruby object IDs are rarely populated in the current
implementation, so ensure the get used.

* hash.c (any_hash): skip rb_objid_hash for static syms
  (rb_num_hash_start): extract from rb_ident_hash
  (rb_objid_hash): call rb_num_hash_start
  (rb_ident_hash): ditto

target 0: a (ruby 2.3.0dev (2015-07-30 trunk 51437) [x86_64-linux]
target 1: b (ruby 2.3.0dev (2015-07-30 patch 51437) [x86_64-linux]

benchmark results from Xeon E3-1230 v3 @ 3.30GHz (turbo disabled):
minimum results in each 10 measurements.
Execution time (sec)
name                a       b
hash_aref_dsym        0.316   0.300
hash_aref_dsym_long   5.106   5.063
hash_aref_fix         0.304   0.297
hash_aref_flo         0.061   0.060
hash_aref_miss        0.433   0.430
hash_aref_str         0.408   0.396
hash_aref_sym         0.312   0.306
hash_aref_sym_long    0.482   0.469
hash_flatten          0.385   0.273
hash_ident_flo        0.036   0.037
hash_ident_num        0.277   0.276
hash_ident_obj        0.291   0.284
hash_ident_str        0.289   0.286
hash_ident_sym        0.285   0.281
hash_keys             0.269   0.271
hash_shift            0.020   0.016
hash_values           0.264   0.264
loop_whileloop2       0.101   0.099
vm2_bighash*          3.066   2.972

Speedup ratio: compare with the result of `a' (greater is better)
name                b
hash_aref_dsym        1.052
hash_aref_dsym_long   1.008
hash_aref_fix         1.024
hash_aref_flo         1.015
hash_aref_miss        1.007
hash_aref_str         1.031
hash_aref_sym         1.018
hash_aref_sym_long    1.027
hash_flatten          1.410
hash_ident_flo        0.994
hash_ident_num        1.001
hash_ident_obj        1.022
hash_ident_str        1.012
hash_ident_sym        1.016
hash_keys             0.992
hash_shift            1.237
hash_values           1.001
loop_whileloop2       1.013
vm2_bighash*          1.032
---
 hash.c | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/hash.c b/hash.c
index 4d5b4c6..0da1084 100644
--- a/hash.c
+++ b/hash.c
@@ -138,7 +138,8 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
     if (SPECIAL_CONST_P(a)) {
 	if (a == Qundef) return 0;
 	if (STATIC_SYM_P(a)) {
-	    a >>= (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
+	    hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
+	    goto out;
 	}
 	else if (FLONUM_P(a)) {
 	    /* prevent pathological behavior: [Bug #10761] */
@@ -160,6 +161,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
     else {
 	hnum = other_func(a);
     }
+  out:
     hnum <<= 1;
     return (st_index_t)RSHIFT(hnum, 1);
 }
@@ -177,10 +179,31 @@ rb_any_hash(VALUE a)
     return any_hash(a, obj_any_hash);
 }
 
+static st_index_t
+rb_num_hash_start(st_index_t n)
+{
+    /*
+     * This hash function is lightly-tuned for Ruby.  Further tuning
+     * should be possible.  Notes:
+     *
+     * - (n >> 3) alone is great for heap objects and OK for fixnum,
+     *   however symbols perform poorly.
+     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make
+     *   symbols hash well
+     *   n.b.: +1 (instead of 3) worked well initially, too
+     * - (n << 3) was finally added to avoid losing bits for fixnums
+     * - avoid expensive modulo instructions, it is currently only
+     *   shifts and bitmask operations.
+     */
+    return (n >> (RUBY_SPECIAL_SHIFT + 3) | (n << 3)) ^ (n >> 3);
+}
+
 long
 rb_objid_hash(st_index_t index)
 {
-    st_index_t hnum = rb_hash_start(index);
+    st_index_t hnum = rb_num_hash_start(index);
+
+    hnum = rb_hash_start(hnum);
     hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
     hnum = rb_hash_end(hnum);
     return hnum;
@@ -215,28 +238,18 @@ static const struct st_hash_type objhash = {
 static st_index_t
 rb_ident_hash(st_data_t n)
 {
+#ifdef USE_FLONUM /* RUBY */
     /*
-     * This hash function is lightly-tuned for Ruby.  Further tuning
-     * should be possible.  Notes:
-     *
-     * - (n >> 3) alone is great for heap objects and OK for fixnum,
-     *   however symbols perform poorly.
-     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
-     *   n.b.: +3 to remove ID scope, +1 worked well initially, too
-     * - (n << 3) was finally added to avoid losing bits for fixnums
-     * - avoid expensive modulo instructions, it is currently only
-     *   shifts and bitmask operations.
      * - flonum (on 64-bit) is pathologically bad, mix the actual
      *   float value in, but do not use the float value as-is since
      *   many integers get interpreted as 2.0 or -2.0 [Bug #10761]
      */
-#ifdef USE_FLONUM /* RUBY */
     if (FLONUM_P(n)) {
 	n ^= (st_data_t)rb_float_value(n);
     }
 #endif
 
-    return (st_index_t)((n>>(RUBY_SPECIAL_SHIFT+3)|(n<<3)) ^ (n>>3));
+    return (st_index_t)rb_num_hash_start((st_index_t)n);
 }
 
 static const struct st_hash_type identhash = {
-- 
EW


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH] hash.c: improve integer/fixnum hashing
@ 2015-07-29 23:14 Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2015-07-29 23:14 UTC (permalink / raw)
  To: spew

The low bits of Ruby object IDs are rarely populated in the current
implementation, so ensure the get used.

Early versions of this patch redundantly shifted static symbols in
any_hash, causing regresions with static symbols in hash_aref_sym

* hash.c (any_hash): skip rb_objid_hash for static syms
  (rb_num_hash_start): extract from rb_ident_hash
  (rb_objid_hash): call rb_num_hash_start
  (rb_ident_hash): ditto

target 0: a (ruby 2.3.0dev (2015-07-30 trunk 51437) [x86_64-linux]
target 1: b (ruby 2.3.0dev (2015-07-30 patch 51437) [x86_64-linux]

benchmark results from Xeon E3-1230 v3 @ 3.30GHz (turbo disabled):
minimum results in each 10 measurements.
Execution time (sec)
name                a       b
hash_aref_dsym        0.316   0.300
hash_aref_dsym_long   5.106   5.063
hash_aref_fix         0.304   0.297
hash_aref_flo         0.061   0.060
hash_aref_miss        0.433   0.430
hash_aref_str         0.408   0.396
hash_aref_sym         0.312   0.306
hash_aref_sym_long    0.482   0.469
hash_flatten          0.385   0.273
hash_ident_flo        0.036   0.037
hash_ident_num        0.277   0.276
hash_ident_obj        0.291   0.284
hash_ident_str        0.289   0.286
hash_ident_sym        0.285   0.281
hash_keys             0.269   0.271
hash_shift            0.020   0.016
hash_values           0.264   0.264
loop_whileloop2       0.101   0.099
vm2_bighash*          3.066   2.972

Speedup ratio: compare with the result of `a' (greater is better)
name                b
hash_aref_dsym        1.052
hash_aref_dsym_long   1.008
hash_aref_fix         1.024
hash_aref_flo         1.015
hash_aref_miss        1.007
hash_aref_str         1.031
hash_aref_sym         1.018
hash_aref_sym_long    1.027
hash_flatten          1.410
hash_ident_flo        0.994
hash_ident_num        1.001
hash_ident_obj        1.022
hash_ident_str        1.012
hash_ident_sym        1.016
hash_keys             0.992
hash_shift            1.237
hash_values           1.001
loop_whileloop2       1.013
vm2_bighash*          1.032
---
 hash.c | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/hash.c b/hash.c
index 4d5b4c6..7ab4833 100644
--- a/hash.c
+++ b/hash.c
@@ -138,7 +138,8 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
     if (SPECIAL_CONST_P(a)) {
 	if (a == Qundef) return 0;
 	if (STATIC_SYM_P(a)) {
-	    a >>= (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
+	    hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
+	    goto out;
 	}
 	else if (FLONUM_P(a)) {
 	    /* prevent pathological behavior: [Bug #10761] */
@@ -160,6 +161,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
     else {
 	hnum = other_func(a);
     }
+  out:
     hnum <<= 1;
     return (st_index_t)RSHIFT(hnum, 1);
 }
@@ -177,10 +179,31 @@ rb_any_hash(VALUE a)
     return any_hash(a, obj_any_hash);
 }
 
+static st_index_t
+rb_num_hash_start(st_index_t n)
+{
+    /*
+     * This hash function is lightly-tuned for Ruby.  Further tuning
+     * should be possible.  Notes:
+     *
+     * - (n >> 3) alone is great for heap objects and OK for fixnum,
+     *   however symbols perform poorly.
+     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
+     *   n.b.: +3 to remove most ID scope, +1 worked well initially, too
+     *   n.b.: +1 (instead of 3) worked well initially, too
+     * - (n << 3) was finally added to avoid losing bits for fixnums
+     * - avoid expensive modulo instructions, it is currently only
+     *   shifts and bitmask operations.
+     */
+    return (n >> (RUBY_SPECIAL_SHIFT + 3) | (n << 3)) ^ (n >> 3);
+}
+
 long
 rb_objid_hash(st_index_t index)
 {
-    st_index_t hnum = rb_hash_start(index);
+    st_index_t hnum = rb_num_hash_start(index);
+
+    hnum = rb_hash_start(hnum);
     hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
     hnum = rb_hash_end(hnum);
     return hnum;
@@ -215,28 +238,18 @@ static const struct st_hash_type objhash = {
 static st_index_t
 rb_ident_hash(st_data_t n)
 {
+#ifdef USE_FLONUM /* RUBY */
     /*
-     * This hash function is lightly-tuned for Ruby.  Further tuning
-     * should be possible.  Notes:
-     *
-     * - (n >> 3) alone is great for heap objects and OK for fixnum,
-     *   however symbols perform poorly.
-     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
-     *   n.b.: +3 to remove ID scope, +1 worked well initially, too
-     * - (n << 3) was finally added to avoid losing bits for fixnums
-     * - avoid expensive modulo instructions, it is currently only
-     *   shifts and bitmask operations.
      * - flonum (on 64-bit) is pathologically bad, mix the actual
      *   float value in, but do not use the float value as-is since
      *   many integers get interpreted as 2.0 or -2.0 [Bug #10761]
      */
-#ifdef USE_FLONUM /* RUBY */
     if (FLONUM_P(n)) {
 	n ^= (st_data_t)rb_float_value(n);
     }
 #endif
 
-    return (st_index_t)((n>>(RUBY_SPECIAL_SHIFT+3)|(n<<3)) ^ (n>>3));
+    return (st_index_t)rb_num_hash_start((st_index_t)n);
 }
 
 static const struct st_hash_type identhash = {
-- 
EW


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-07-29 23:14 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-29 23:10 [PATCH] hash.c: improve integer/fixnum hashing Eric Wong
  -- strict thread matches above, loose matches on Subject: below --
2015-07-29 23:14 Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).