dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH] hash.c: improve integer hashing
@ 2015-07-28 21:16 Eric Wong
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2015-07-28 21:16 UTC (permalink / raw)
  To: spew

The low bits of Ruby object IDs are rarely populated in the current
implementation, so

* hash.c (rb_objid_hash): use hash from rb_ident_hash
  (rb_ident_hash): call rb_objid_hash
---
 hash.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/hash.c b/hash.c
index 5b13d98..f46662a 100644
--- a/hash.c
+++ b/hash.c
@@ -163,10 +163,30 @@ rb_any_hash(VALUE a)
     return (st_index_t)RSHIFT(hnum, 1);
 }
 
+static st_index_t
+rb_num_hash_start(st_index_t n)
+{
+    /*
+     * This hash function is lightly-tuned for Ruby.  Further tuning
+     * should be possible.  Notes:
+     *
+     * - (n >> 3) alone is great for heap objects and OK for fixnum,
+     *   however symbols perform poorly.
+     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
+     *   n.b.: +3 to remove ID scope, +1 worked well initially, too
+     * - (n << 3) was finally added to avoid losing bits for fixnums
+     * - avoid expensive modulo instructions, it is currently only
+     *   shifts and bitmask operations.
+     */
+    return (n >> (RUBY_SPECIAL_SHIFT + 3) | (n << 3)) ^ (n >> 3);
+}
+
 long
 rb_objid_hash(st_index_t index)
 {
-    st_index_t hnum = rb_hash_start(index);
+    st_index_t hnum = rb_num_hash_start(index);
+
+    hnum = rb_hash_start(hnum);
     hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
     hnum = rb_hash_end(hnum);
     return hnum;
@@ -188,28 +208,18 @@ static const struct st_hash_type objhash = {
 static st_index_t
 rb_ident_hash(st_data_t n)
 {
+#ifdef USE_FLONUM /* RUBY */
     /*
-     * This hash function is lightly-tuned for Ruby.  Further tuning
-     * should be possible.  Notes:
-     *
-     * - (n >> 3) alone is great for heap objects and OK for fixnum,
-     *   however symbols perform poorly.
-     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
-     *   n.b.: +3 to remove ID scope, +1 worked well initially, too
-     * - (n << 3) was finally added to avoid losing bits for fixnums
-     * - avoid expensive modulo instructions, it is currently only
-     *   shifts and bitmask operations.
      * - flonum (on 64-bit) is pathologically bad, mix the actual
      *   float value in, but do not use the float value as-is since
      *   many integers get interpreted as 2.0 or -2.0 [Bug #10761]
      */
-#ifdef USE_FLONUM /* RUBY */
     if (FLONUM_P(n)) {
 	n ^= (st_data_t)rb_float_value(n);
     }
 #endif
 
-    return (st_index_t)((n>>(RUBY_SPECIAL_SHIFT+3)|(n<<3)) ^ (n>>3));
+    return (st_index_t)rb_num_hash_start((st_index_t)n);
 }
 
 static const struct st_hash_type identhash = {
-- 
EW


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] hash.c: improve integer hashing
@ 2015-07-29 19:58 Eric Wong
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2015-07-29 19:58 UTC (permalink / raw)
  To: spew

The low bits of Ruby object IDs are rarely populated in the current
implementation, so ensure the get used.

* hash.c (rb_num_hash_start): extract from rb_ident_hash
  (rb_objid_hash): call rb_num_hash_start
  (rb_ident_hash): ditto
---
 hash.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/hash.c b/hash.c
index 4d5b4c6..062805b 100644
--- a/hash.c
+++ b/hash.c
@@ -177,10 +177,30 @@ rb_any_hash(VALUE a)
     return any_hash(a, obj_any_hash);
 }
 
+static st_index_t
+rb_num_hash_start(st_index_t n)
+{
+    /*
+     * This hash function is lightly-tuned for Ruby.  Further tuning
+     * should be possible.  Notes:
+     *
+     * - (n >> 3) alone is great for heap objects and OK for fixnum,
+     *   however symbols perform poorly.
+     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
+     *   n.b.: +3 to remove ID scope, +1 worked well initially, too
+     * - (n << 3) was finally added to avoid losing bits for fixnums
+     * - avoid expensive modulo instructions, it is currently only
+     *   shifts and bitmask operations.
+     */
+    return (n >> (RUBY_SPECIAL_SHIFT + 3) | (n << 3)) ^ (n >> 3);
+}
+
 long
 rb_objid_hash(st_index_t index)
 {
-    st_index_t hnum = rb_hash_start(index);
+    st_index_t hnum = rb_num_hash_start(index);
+
+    hnum = rb_hash_start(hnum);
     hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
     hnum = rb_hash_end(hnum);
     return hnum;
@@ -215,28 +235,18 @@ static const struct st_hash_type objhash = {
 static st_index_t
 rb_ident_hash(st_data_t n)
 {
+#ifdef USE_FLONUM /* RUBY */
     /*
-     * This hash function is lightly-tuned for Ruby.  Further tuning
-     * should be possible.  Notes:
-     *
-     * - (n >> 3) alone is great for heap objects and OK for fixnum,
-     *   however symbols perform poorly.
-     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
-     *   n.b.: +3 to remove ID scope, +1 worked well initially, too
-     * - (n << 3) was finally added to avoid losing bits for fixnums
-     * - avoid expensive modulo instructions, it is currently only
-     *   shifts and bitmask operations.
      * - flonum (on 64-bit) is pathologically bad, mix the actual
      *   float value in, but do not use the float value as-is since
      *   many integers get interpreted as 2.0 or -2.0 [Bug #10761]
      */
-#ifdef USE_FLONUM /* RUBY */
     if (FLONUM_P(n)) {
 	n ^= (st_data_t)rb_float_value(n);
     }
 #endif
 
-    return (st_index_t)((n>>(RUBY_SPECIAL_SHIFT+3)|(n<<3)) ^ (n>>3));
+    return (st_index_t)rb_num_hash_start((st_index_t)n);
 }
 
 static const struct st_hash_type identhash = {
-- 
EW


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] hash.c: improve integer hashing
@ 2015-07-29 22:20 Eric Wong
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2015-07-29 22:20 UTC (permalink / raw)
  To: spew

The low bits of Ruby object IDs are rarely populated in the current
implementation, so ensure the get used.

While we're at it, use ID_SCOPE_SHIFT properly instead of
hard-coding the outdated value of 3  into rb_num_hash_start.
Earlier versions of this patch failed to do this, and the redundant
ID_SCOPE_SHIFT in any_hash hurt performance.

* hash.c (any_hash): rely on rb_objid_hash for scope shift
  (rb_num_hash_start): extract from rb_ident_hash
  (rb_objid_hash): call rb_num_hash_start
  (rb_ident_hash): ditto
---
 hash.c | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/hash.c b/hash.c
index 4d5b4c6..fdd184e 100644
--- a/hash.c
+++ b/hash.c
@@ -137,10 +137,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
 
     if (SPECIAL_CONST_P(a)) {
 	if (a == Qundef) return 0;
-	if (STATIC_SYM_P(a)) {
-	    a >>= (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
-	}
-	else if (FLONUM_P(a)) {
+	if (FLONUM_P(a)) {
 	    /* prevent pathological behavior: [Bug #10761] */
 	    goto flt;
 	}
@@ -177,10 +174,31 @@ rb_any_hash(VALUE a)
     return any_hash(a, obj_any_hash);
 }
 
+static st_index_t
+rb_num_hash_start(st_index_t n)
+{
+    /*
+     * This hash function is lightly-tuned for Ruby.  Further tuning
+     * should be possible.  Notes:
+     *
+     * - (n >> 3) alone is great for heap objects and OK for fixnum,
+     *   however symbols perform poorly.
+     * - (n >> (RUBY_SPECIAL_SHIFT+ID_SCOPE_SHIFT)) was added to make
+     *   symbols hash well
+     *   n.b.: +1 (instead of ID_SCOPE_SHIFT) worked well initially, too
+     * - (n << 3) was finally added to avoid losing bits for fixnums
+     * - avoid expensive modulo instructions, it is currently only
+     *   shifts and bitmask operations.
+     */
+    return (n >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT) | (n << 3)) ^ (n >> 3);
+}
+
 long
 rb_objid_hash(st_index_t index)
 {
-    st_index_t hnum = rb_hash_start(index);
+    st_index_t hnum = rb_num_hash_start(index);
+
+    hnum = rb_hash_start(hnum);
     hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
     hnum = rb_hash_end(hnum);
     return hnum;
@@ -215,28 +233,18 @@ static const struct st_hash_type objhash = {
 static st_index_t
 rb_ident_hash(st_data_t n)
 {
+#ifdef USE_FLONUM /* RUBY */
     /*
-     * This hash function is lightly-tuned for Ruby.  Further tuning
-     * should be possible.  Notes:
-     *
-     * - (n >> 3) alone is great for heap objects and OK for fixnum,
-     *   however symbols perform poorly.
-     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
-     *   n.b.: +3 to remove ID scope, +1 worked well initially, too
-     * - (n << 3) was finally added to avoid losing bits for fixnums
-     * - avoid expensive modulo instructions, it is currently only
-     *   shifts and bitmask operations.
      * - flonum (on 64-bit) is pathologically bad, mix the actual
      *   float value in, but do not use the float value as-is since
      *   many integers get interpreted as 2.0 or -2.0 [Bug #10761]
      */
-#ifdef USE_FLONUM /* RUBY */
     if (FLONUM_P(n)) {
 	n ^= (st_data_t)rb_float_value(n);
     }
 #endif
 
-    return (st_index_t)((n>>(RUBY_SPECIAL_SHIFT+3)|(n<<3)) ^ (n>>3));
+    return (st_index_t)rb_num_hash_start((st_index_t)n);
 }
 
 static const struct st_hash_type identhash = {
-- 
EW


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] hash.c: improve integer hashing
@ 2015-07-29 22:46 Eric Wong
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2015-07-29 22:46 UTC (permalink / raw)
  To: spew

The low bits of Ruby object IDs are rarely populated in the current
implementation, so ensure the get used.

While we're at it, use ID_SCOPE_SHIFT properly instead of
hard-coding the outdated value of 3  into rb_num_hash_start.
Earlier versions of this patch failed to do this, and the redundant
ID_SCOPE_SHIFT in any_hash hurt performance.

* hash.c (any_hash): rely on rb_objid_hash for scope shift
  (rb_num_hash_start): extract from rb_ident_hash
  (rb_objid_hash): call rb_num_hash_start
  (rb_ident_hash): ditto
---
 hash.c | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/hash.c b/hash.c
index 4d5b4c6..0da1084 100644
--- a/hash.c
+++ b/hash.c
@@ -138,7 +138,8 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
     if (SPECIAL_CONST_P(a)) {
 	if (a == Qundef) return 0;
 	if (STATIC_SYM_P(a)) {
-	    a >>= (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
+	    hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
+	    goto out;
 	}
 	else if (FLONUM_P(a)) {
 	    /* prevent pathological behavior: [Bug #10761] */
@@ -160,6 +161,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
     else {
 	hnum = other_func(a);
     }
+  out:
     hnum <<= 1;
     return (st_index_t)RSHIFT(hnum, 1);
 }
@@ -177,10 +179,31 @@ rb_any_hash(VALUE a)
     return any_hash(a, obj_any_hash);
 }
 
+static st_index_t
+rb_num_hash_start(st_index_t n)
+{
+    /*
+     * This hash function is lightly-tuned for Ruby.  Further tuning
+     * should be possible.  Notes:
+     *
+     * - (n >> 3) alone is great for heap objects and OK for fixnum,
+     *   however symbols perform poorly.
+     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make
+     *   symbols hash well
+     *   n.b.: +1 (instead of 3) worked well initially, too
+     * - (n << 3) was finally added to avoid losing bits for fixnums
+     * - avoid expensive modulo instructions, it is currently only
+     *   shifts and bitmask operations.
+     */
+    return (n >> (RUBY_SPECIAL_SHIFT + 3) | (n << 3)) ^ (n >> 3);
+}
+
 long
 rb_objid_hash(st_index_t index)
 {
-    st_index_t hnum = rb_hash_start(index);
+    st_index_t hnum = rb_num_hash_start(index);
+
+    hnum = rb_hash_start(hnum);
     hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
     hnum = rb_hash_end(hnum);
     return hnum;
@@ -215,28 +238,18 @@ static const struct st_hash_type objhash = {
 static st_index_t
 rb_ident_hash(st_data_t n)
 {
+#ifdef USE_FLONUM /* RUBY */
     /*
-     * This hash function is lightly-tuned for Ruby.  Further tuning
-     * should be possible.  Notes:
-     *
-     * - (n >> 3) alone is great for heap objects and OK for fixnum,
-     *   however symbols perform poorly.
-     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
-     *   n.b.: +3 to remove ID scope, +1 worked well initially, too
-     * - (n << 3) was finally added to avoid losing bits for fixnums
-     * - avoid expensive modulo instructions, it is currently only
-     *   shifts and bitmask operations.
      * - flonum (on 64-bit) is pathologically bad, mix the actual
      *   float value in, but do not use the float value as-is since
      *   many integers get interpreted as 2.0 or -2.0 [Bug #10761]
      */
-#ifdef USE_FLONUM /* RUBY */
     if (FLONUM_P(n)) {
 	n ^= (st_data_t)rb_float_value(n);
     }
 #endif
 
-    return (st_index_t)((n>>(RUBY_SPECIAL_SHIFT+3)|(n<<3)) ^ (n>>3));
+    return (st_index_t)rb_num_hash_start((st_index_t)n);
 }
 
 static const struct st_hash_type identhash = {
-- 
EW


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-07-29 22:46 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-29 19:58 [PATCH] hash.c: improve integer hashing Eric Wong
  -- strict thread matches above, loose matches on Subject: below --
2015-07-29 22:46 Eric Wong
2015-07-29 22:20 Eric Wong
2015-07-28 21:16 Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).