From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS8972 188.138.9.0/24 X-Spam-Status: No, score=-1.5 required=3.0 tests=AWL,BAYES_00,RCVD_IN_XBL shortcircuit=no autolearn=no version=3.3.2 X-Original-To: spew@80x24.org Received: from 80x24.org (atlantic480.us.unmetered.com [188.138.9.49]) by dcvr.yhbt.net (Postfix) with ESMTP id 848671FEF7 for ; Wed, 29 Jul 2015 23:14:32 +0000 (UTC) From: Eric Wong To: spew@80x24.org Subject: [PATCH] hash.c: improve integer/fixnum hashing Date: Wed, 29 Jul 2015 23:14:31 +0000 Message-Id: <1438211671-3182-1-git-send-email-e@80x24.org> List-Id: The low bits of Ruby object IDs are rarely populated in the current implementation, so ensure the get used. Early versions of this patch redundantly shifted static symbols in any_hash, causing regresions with static symbols in hash_aref_sym * hash.c (any_hash): skip rb_objid_hash for static syms (rb_num_hash_start): extract from rb_ident_hash (rb_objid_hash): call rb_num_hash_start (rb_ident_hash): ditto target 0: a (ruby 2.3.0dev (2015-07-30 trunk 51437) [x86_64-linux] target 1: b (ruby 2.3.0dev (2015-07-30 patch 51437) [x86_64-linux] benchmark results from Xeon E3-1230 v3 @ 3.30GHz (turbo disabled): minimum results in each 10 measurements. Execution time (sec) name a b hash_aref_dsym 0.316 0.300 hash_aref_dsym_long 5.106 5.063 hash_aref_fix 0.304 0.297 hash_aref_flo 0.061 0.060 hash_aref_miss 0.433 0.430 hash_aref_str 0.408 0.396 hash_aref_sym 0.312 0.306 hash_aref_sym_long 0.482 0.469 hash_flatten 0.385 0.273 hash_ident_flo 0.036 0.037 hash_ident_num 0.277 0.276 hash_ident_obj 0.291 0.284 hash_ident_str 0.289 0.286 hash_ident_sym 0.285 0.281 hash_keys 0.269 0.271 hash_shift 0.020 0.016 hash_values 0.264 0.264 loop_whileloop2 0.101 0.099 vm2_bighash* 3.066 2.972 Speedup ratio: compare with the result of `a' (greater is better) name b hash_aref_dsym 1.052 hash_aref_dsym_long 1.008 hash_aref_fix 1.024 hash_aref_flo 1.015 hash_aref_miss 1.007 hash_aref_str 1.031 hash_aref_sym 1.018 hash_aref_sym_long 1.027 hash_flatten 1.410 hash_ident_flo 0.994 hash_ident_num 1.001 hash_ident_obj 1.022 hash_ident_str 1.012 hash_ident_sym 1.016 hash_keys 0.992 hash_shift 1.237 hash_values 1.001 loop_whileloop2 1.013 vm2_bighash* 1.032 --- hash.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/hash.c b/hash.c index 4d5b4c6..7ab4833 100644 --- a/hash.c +++ b/hash.c @@ -138,7 +138,8 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE)) if (SPECIAL_CONST_P(a)) { if (a == Qundef) return 0; if (STATIC_SYM_P(a)) { - a >>= (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT); + hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT); + goto out; } else if (FLONUM_P(a)) { /* prevent pathological behavior: [Bug #10761] */ @@ -160,6 +161,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE)) else { hnum = other_func(a); } + out: hnum <<= 1; return (st_index_t)RSHIFT(hnum, 1); } @@ -177,10 +179,31 @@ rb_any_hash(VALUE a) return any_hash(a, obj_any_hash); } +static st_index_t +rb_num_hash_start(st_index_t n) +{ + /* + * This hash function is lightly-tuned for Ruby. Further tuning + * should be possible. Notes: + * + * - (n >> 3) alone is great for heap objects and OK for fixnum, + * however symbols perform poorly. + * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well, + * n.b.: +3 to remove most ID scope, +1 worked well initially, too + * n.b.: +1 (instead of 3) worked well initially, too + * - (n << 3) was finally added to avoid losing bits for fixnums + * - avoid expensive modulo instructions, it is currently only + * shifts and bitmask operations. + */ + return (n >> (RUBY_SPECIAL_SHIFT + 3) | (n << 3)) ^ (n >> 3); +} + long rb_objid_hash(st_index_t index) { - st_index_t hnum = rb_hash_start(index); + st_index_t hnum = rb_num_hash_start(index); + + hnum = rb_hash_start(hnum); hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash); hnum = rb_hash_end(hnum); return hnum; @@ -215,28 +238,18 @@ static const struct st_hash_type objhash = { static st_index_t rb_ident_hash(st_data_t n) { +#ifdef USE_FLONUM /* RUBY */ /* - * This hash function is lightly-tuned for Ruby. Further tuning - * should be possible. Notes: - * - * - (n >> 3) alone is great for heap objects and OK for fixnum, - * however symbols perform poorly. - * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well, - * n.b.: +3 to remove ID scope, +1 worked well initially, too - * - (n << 3) was finally added to avoid losing bits for fixnums - * - avoid expensive modulo instructions, it is currently only - * shifts and bitmask operations. * - flonum (on 64-bit) is pathologically bad, mix the actual * float value in, but do not use the float value as-is since * many integers get interpreted as 2.0 or -2.0 [Bug #10761] */ -#ifdef USE_FLONUM /* RUBY */ if (FLONUM_P(n)) { n ^= (st_data_t)rb_float_value(n); } #endif - return (st_index_t)((n>>(RUBY_SPECIAL_SHIFT+3)|(n<<3)) ^ (n>>3)); + return (st_index_t)rb_num_hash_start((st_index_t)n); } static const struct st_hash_type identhash = { -- EW