dumping ground for random patches and texts
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] memoize hashval for RSymbol
Date: Mon, 27 Jul 2015 10:17:50 +0000	[thread overview]
Message-ID: <1437992270-20549-1-git-send-email-e@80x24.org> (raw)

This speeds up the hash function for dynamic symbols.
[ruby-core:70129] [Bug #11396], nearly up to Ruby 2.1 levels

Power-of-two hash sizing [Feature #9425] speeds up cases where we
have a good hash, but this means we can no longer hide behind weak
hashes.  Unfortunately, object IDs do not hash well, but we may
use the extra space in the RSymbol struct to memoize the hash value.

Further optimizations should be possible.  For now, the st.c APIs
force us to calculate rb_str_hash redundantly at dsym registration.
---
 common.mk | 1 +
 hash.c    | 3 ++-
 symbol.c  | 6 ++++++
 symbol.h  | 1 +
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/common.mk b/common.mk
index 9d00291..46d8a2a 100644
--- a/common.mk
+++ b/common.mk
@@ -1532,6 +1532,7 @@ hash.$(OBJEXT): {$(VPATH)}oniguruma.h
 hash.$(OBJEXT): {$(VPATH)}probes.h
 hash.$(OBJEXT): {$(VPATH)}st.h
 hash.$(OBJEXT): {$(VPATH)}subst.h
+hash.$(OBJEXT): {$(VPATH)}symbol.h
 hash.$(OBJEXT): {$(VPATH)}util.h
 hash.$(OBJEXT): {$(VPATH)}vm_opts.h
 inits.$(OBJEXT): $(hdrdir)/ruby/ruby.h
diff --git a/hash.c b/hash.c
index 7b8733f..5b13d98 100644
--- a/hash.c
+++ b/hash.c
@@ -17,6 +17,7 @@
 #include <errno.h>
 #include "probes.h"
 #include "id.h"
+#include "symbol.h"
 
 #ifdef __APPLE__
 # ifdef HAVE_CRT_EXTERNS_H
@@ -149,7 +150,7 @@ rb_any_hash(VALUE a)
 	hnum = rb_str_hash(a);
     }
     else if (BUILTIN_TYPE(a) == T_SYMBOL) {
-	hnum = rb_objid_hash((st_index_t)a);
+	return RSYMBOL(a)->hashval;
     }
     else if (BUILTIN_TYPE(a) == T_FLOAT) {
 	return rb_dbl_hash(rb_float_value(a));
diff --git a/symbol.c b/symbol.c
index 9fbe3dd..9e2fccd 100644
--- a/symbol.c
+++ b/symbol.c
@@ -505,12 +505,18 @@ static VALUE
 dsymbol_alloc(const VALUE klass, const VALUE str, rb_encoding * const enc, const ID type)
 {
     const VALUE dsym = rb_newobj_of(klass, T_SYMBOL | FL_WB_PROTECTED);
+    st_index_t hashval;
 
     rb_enc_associate(dsym, enc);
     OBJ_FREEZE(dsym);
     RB_OBJ_WRITE(dsym, &RSYMBOL(dsym)->fstr, str);
     RSYMBOL(dsym)->id = type;
 
+    /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */
+    hashval = rb_str_hash(str);
+    hashval <<= 1;
+    RSYMBOL(dsym)->hashval = (st_index_t)RSHIFT(hashval, 1);
+
     register_sym(str, dsym);
     rb_hash_aset(global_symbols.dsymbol_fstr_hash, str, Qtrue);
 
diff --git a/symbol.h b/symbol.h
index 549eabd..5c52b97 100644
--- a/symbol.h
+++ b/symbol.h
@@ -25,6 +25,7 @@
 
 struct RSymbol {
     struct RBasic basic;
+    st_index_t hashval;
     VALUE fstr;
     ID id;
 };
-- 
EW


                 reply	other threads:[~2015-07-27 10:17 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1437992270-20549-1-git-send-email-e@80x24.org \
    --to=e@80x24.org \
    --cc=spew@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).