From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 180471F461 for ; Sun, 24 Mar 2024 23:05:43 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1711321543; bh=YdjT6ecStygrTkhTb71mMertOXm5umPH3diiTa2Kzao=; h=From:To:Subject:Date:In-Reply-To:References:From; b=Hq/Fqeo6C3gYQtNPKQ5wLLo4LJHGrFICkH35ZvgvSsLT2iP7ZyTxcESE7hQPQ2bIB eUvClAjTiWxe/5V9+BtK83Gso89uk3Ncb14m9OSlZv4VBNEj0F5Zl8df409FiGkJic S39KpcN+S3GWAvWe0DS07jXx41HpZyYCWBh6tlpI= From: Eric Wong To: spew@80x24.org Subject: [PATCH 2/2] switch to khashl ensemble Date: Sun, 24 Mar 2024 23:05:42 +0000 Message-ID: <20240324230542.2364726-2-e@80x24.org> In-Reply-To: <20240324230542.2364726-1-e@80x24.org> References: <20240324230542.2364726-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Using an ensemble of hash tables to implement a larger one reduces the space required to resize a hash table and lowers overhead. Instead of doubling the size of the entire table, only a sub-hash is doubled in size when growing. --- builtin/fast-import.c | 10 ++++---- builtin/replay.c | 10 ++++---- delta-islands.c | 54 +++++++++++++++++++++---------------------- fsck.c | 10 ++++---- khashl.h | 4 ++-- pack-bitmap-write.c | 4 ++-- pack-bitmap.c | 32 ++++++++++++------------- 7 files changed, 62 insertions(+), 62 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 29e50fd675..190e136e2e 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2198,7 +2198,7 @@ static uintmax_t change_note_fanout(struct tree_entry *root, static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const char **end) { int algo; - khiter_t it; + kh_ensitr_t it; /* Make SHA-1 object IDs have all-zero padding. */ memset(oid->hash, 0, sizeof(oid->hash)); @@ -2209,13 +2209,13 @@ static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const ch it = kh_get_oid_map(sub_oid_map, *oid); /* No such object? */ - if (it == kh_end(sub_oid_map)) { + if (kh_ens_is_end(it)) { /* If we're using the same algorithm, pass it through. */ if (hash_algos[algo].format_id == the_hash_algo->format_id) return 0; return -1; } - oidcpy(oid, kh_value(sub_oid_map, it)); + oidcpy(oid, kh_ens_val(sub_oid_map, it)); return 0; } @@ -3083,13 +3083,13 @@ static void insert_mapped_mark(uintmax_t mark, void *object, void *cbp) struct object_id *fromoid = object; struct object_id *tooid = find_mark(cbp, mark); int ret; - khiter_t it; + kh_ensitr_t it; it = kh_put_oid_map(sub_oid_map, *fromoid, &ret); /* We've already seen this object. */ if (ret == 0) return; - kh_value(sub_oid_map, it) = tooid; + kh_ens_val(sub_oid_map, it) = tooid; } static void build_mark_map_one(struct mark_set *from, struct mark_set *to) diff --git a/builtin/replay.c b/builtin/replay.c index 6bc4b47f09..e084da8a94 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -227,10 +227,10 @@ static struct commit *mapped_commit(kh_oid_map_t *replayed_commits, struct commit *commit, struct commit *fallback) { - khint_t pos = kh_get_oid_map(replayed_commits, commit->object.oid); - if (pos == kh_end(replayed_commits)) + kh_ensitr_t pos = kh_get_oid_map(replayed_commits, commit->object.oid); + if (kh_ens_is_end(pos)) return fallback; - return kh_value(replayed_commits, pos); + return kh_ens_val(replayed_commits, pos); } static struct commit *pick_regular_commit(struct commit *pickme, @@ -381,7 +381,7 @@ int cmd_replay(int argc, const char **argv, const char *prefix) replayed_commits = kh_init_oid_map(); while ((commit = get_revision(&revs))) { const struct name_decoration *decoration; - khint_t pos; + kh_ensitr_t pos; int hr; if (!commit->parents) @@ -399,7 +399,7 @@ int cmd_replay(int argc, const char **argv, const char *prefix) if (hr == 0) BUG("Duplicate rewritten commit: %s\n", oid_to_hex(&commit->object.oid)); - kh_value(replayed_commits, pos) = last_commit; + kh_ens_val(replayed_commits, pos) = last_commit; /* Update any necessary branches */ if (advance_name) diff --git a/delta-islands.c b/delta-islands.c index aa35839f15..de159e98a8 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -90,7 +90,7 @@ static int island_bitmap_get(struct island_bitmap *self, uint32_t i) int in_same_island(const struct object_id *trg_oid, const struct object_id *src_oid) { - khiter_t trg_pos, src_pos; + kh_ensitr_t trg_pos, src_pos; /* If we aren't using islands, assume everything goes together. */ if (!island_marks) @@ -101,7 +101,7 @@ int in_same_island(const struct object_id *trg_oid, const struct object_id *src_ * against anything -- it's not an important object */ trg_pos = kh_get_oid_map(island_marks, *trg_oid); - if (trg_pos >= kh_end(island_marks)) + if (kh_ens_is_end(trg_pos)) return 1; /* @@ -109,28 +109,28 @@ int in_same_island(const struct object_id *trg_oid, const struct object_id *src_ * we don't want to base any deltas on it! */ src_pos = kh_get_oid_map(island_marks, *src_oid); - if (src_pos >= kh_end(island_marks)) + if (kh_ens_is_end(src_pos)) return 0; - return island_bitmap_is_subset(kh_value(island_marks, trg_pos), - kh_value(island_marks, src_pos)); + return island_bitmap_is_subset(kh_ens_val(island_marks, trg_pos), + kh_ens_val(island_marks, src_pos)); } int island_delta_cmp(const struct object_id *a, const struct object_id *b) { - khiter_t a_pos, b_pos; + kh_ensitr_t a_pos, b_pos; struct island_bitmap *a_bitmap = NULL, *b_bitmap = NULL; if (!island_marks) return 0; a_pos = kh_get_oid_map(island_marks, *a); - if (a_pos < kh_end(island_marks)) - a_bitmap = kh_value(island_marks, a_pos); + if (!kh_ens_is_end(a_pos)) + a_bitmap = kh_ens_val(island_marks, a_pos); b_pos = kh_get_oid_map(island_marks, *b); - if (b_pos < kh_end(island_marks)) - b_bitmap = kh_value(island_marks, b_pos); + if (!kh_ens_is_end(b_pos)) + b_bitmap = kh_ens_val(island_marks, b_pos); if (a_bitmap) { if (!b_bitmap || !island_bitmap_is_subset(a_bitmap, b_bitmap)) @@ -146,20 +146,20 @@ int island_delta_cmp(const struct object_id *a, const struct object_id *b) static struct island_bitmap *create_or_get_island_marks(struct object *obj) { - khiter_t pos; + kh_ensitr_t pos; int hash_ret; pos = kh_put_oid_map(island_marks, obj->oid, &hash_ret); if (hash_ret) - kh_value(island_marks, pos) = island_bitmap_new(NULL); + kh_ens_val(island_marks, pos) = island_bitmap_new(NULL); - return kh_value(island_marks, pos); + return kh_ens_val(island_marks, pos); } static void set_island_marks(struct object *obj, struct island_bitmap *marks) { struct island_bitmap *b; - khiter_t pos; + kh_ensitr_t pos; int hash_ret; pos = kh_put_oid_map(island_marks, obj->oid, &hash_ret); @@ -169,7 +169,7 @@ static void set_island_marks(struct object *obj, struct island_bitmap *marks) * parent. */ marks->refcount++; - kh_value(island_marks, pos) = marks; + kh_ens_val(island_marks, pos) = marks; return; } @@ -177,10 +177,10 @@ static void set_island_marks(struct object *obj, struct island_bitmap *marks) * We do have it. Make sure we split any copy-on-write before * updating. */ - b = kh_value(island_marks, pos); + b = kh_ens_val(island_marks, pos); if (b->refcount > 1) { b->refcount--; - b = kh_value(island_marks, pos) = island_bitmap_new(b); + b = kh_ens_val(island_marks, pos) = island_bitmap_new(b); } island_bitmap_or(b, marks); } @@ -272,13 +272,13 @@ void resolve_tree_islands(struct repository *r, struct tree *tree; struct tree_desc desc; struct name_entry entry; - khiter_t pos; + kh_ensitr_t pos; pos = kh_get_oid_map(island_marks, ent->idx.oid); - if (pos >= kh_end(island_marks)) + if (kh_ens_is_end(pos)) continue; - root_marks = kh_value(island_marks, pos); + root_marks = kh_ens_val(island_marks, pos); tree = lookup_tree(r, &ent->idx.oid); if (!tree || parse_tree(tree) < 0) @@ -499,11 +499,11 @@ void load_delta_islands(struct repository *r, int progress) void propagate_island_marks(struct commit *commit) { - khiter_t pos = kh_get_oid_map(island_marks, commit->object.oid); + kh_ensitr_t pos = kh_get_oid_map(island_marks, commit->object.oid); - if (pos < kh_end(island_marks)) { + if (!kh_ens_is_end(pos)) { struct commit_list *p; - struct island_bitmap *root_marks = kh_value(island_marks, pos); + struct island_bitmap *root_marks = kh_ens_val(island_marks, pos); repo_parse_commit(the_repository, commit); set_island_marks(&repo_get_commit_tree(the_repository, commit)->object, @@ -518,7 +518,7 @@ void free_island_marks(void) struct island_bitmap *bitmap; if (island_marks) { - kh_foreach_value(island_marks, bitmap, { + kh_ens_foreach_value(island_marks, bitmap, { if (!--bitmap->refcount) free(bitmap); }); @@ -538,12 +538,12 @@ int compute_pack_layers(struct packing_data *to_pack) for (i = 0; i < to_pack->nr_objects; ++i) { struct object_entry *entry = &to_pack->objects[i]; - khiter_t pos = kh_get_oid_map(island_marks, entry->idx.oid); + kh_ensitr_t pos = kh_get_oid_map(island_marks, entry->idx.oid); oe_set_layer(to_pack, entry, 1); - if (pos < kh_end(island_marks)) { - struct island_bitmap *bitmap = kh_value(island_marks, pos); + if (!kh_ens_is_end(pos)) { + struct island_bitmap *bitmap = kh_ens_val(island_marks, pos); if (island_bitmap_get(bitmap, island_counter_core)) oe_set_layer(to_pack, entry, 0); diff --git a/fsck.c b/fsck.c index 8ded0a473a..4c67e1e64c 100644 --- a/fsck.c +++ b/fsck.c @@ -266,13 +266,13 @@ void fsck_enable_object_names(struct fsck_options *options) const char *fsck_get_object_name(struct fsck_options *options, const struct object_id *oid) { - khiter_t pos; + kh_ensitr_t pos; if (!options->object_names) return NULL; pos = kh_get_oid_map(options->object_names, *oid); - if (pos >= kh_end(options->object_names)) + if (kh_ens_is_end(pos)) return NULL; - return kh_value(options->object_names, pos); + return kh_ens_val(options->object_names, pos); } void fsck_put_object_name(struct fsck_options *options, @@ -281,7 +281,7 @@ void fsck_put_object_name(struct fsck_options *options, { va_list ap; struct strbuf buf = STRBUF_INIT; - khiter_t pos; + kh_ensitr_t pos; int hashret; if (!options->object_names) @@ -292,7 +292,7 @@ void fsck_put_object_name(struct fsck_options *options, return; va_start(ap, fmt); strbuf_vaddf(&buf, fmt, ap); - kh_value(options->object_names, pos) = strbuf_detach(&buf, NULL); + kh_ens_val(options->object_names, pos) = strbuf_detach(&buf, NULL); va_end(ap); } diff --git a/khashl.h b/khashl.h index a1eca0f3fd..fc905e280f 100644 --- a/khashl.h +++ b/khashl.h @@ -245,7 +245,7 @@ typedef struct { khint64_t count:54, bits:8; \ HType##_sub *sub; \ } HType; \ - SCOPE HType *prefix##_init_sub(HType *g, size_t bits) { \ + SCOPE HType *prefix##_init_bits(HType *g, size_t bits) { \ g->bits = bits; \ g->sub = (HType##_sub*)kcalloc(1U<sub)); \ return g; \ @@ -253,7 +253,7 @@ typedef struct { SCOPE HType *prefix##_init(void) { \ HType *g; \ g = (HType*)kcalloc(1, sizeof(*g)); \ - return prefix##_init_sub(g, 0); /* unsure about default */ \ + return prefix##_init_bits(g, 6); /* unsure about default */ \ } \ SCOPE void prefix##_release(HType *g) { \ int t; \ diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 990a9498d7..bbf2090c46 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -465,7 +465,7 @@ static int fill_bitmap_commit(struct bb_commit *ent, static void store_selected(struct bb_commit *ent, struct commit *commit) { struct bitmapped_commit *stored = &writer.selected[ent->idx]; - khiter_t hash_pos; + kh_ensitr_t hash_pos; int hash_ret; stored->bitmap = bitmap_to_ewah(ent->bitmap); @@ -474,7 +474,7 @@ static void store_selected(struct bb_commit *ent, struct commit *commit) if (hash_ret == 0) die("Duplicate entry when writing index: %s", oid_to_hex(&commit->object.oid)); - kh_value(writer.bitmaps, hash_pos) = stored; + kh_ens_val(writer.bitmaps, hash_pos) = stored; } int bitmap_writer_build(struct packing_data *to_pack) diff --git a/pack-bitmap.c b/pack-bitmap.c index 2baeabacee..68cd893dee 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -214,7 +214,7 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, int flags) { struct stored_bitmap *stored; - khiter_t hash_pos; + kh_ensitr_t hash_pos; int ret; stored = xmalloc(sizeof(struct stored_bitmap)); @@ -235,7 +235,7 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, return NULL; } - kh_value(index->bitmaps, hash_pos) = stored; + kh_ens_val(index->bitmaps, hash_pos) = stored; return stored; } @@ -721,7 +721,7 @@ static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_ static size_t xor_items_nr = 0, xor_items_alloc = 0; static int is_corrupt = 0; int xor_flags; - khiter_t hash_pos; + kh_ensitr_t hash_pos; struct bitmap_lookup_table_xor_item *xor_item; if (is_corrupt) @@ -766,8 +766,8 @@ static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_ * has already been stored. So, assign this stored bitmap * to the xor_bitmap. */ - if (hash_pos < kh_end(bitmap_git->bitmaps) && - (xor_bitmap = kh_value(bitmap_git->bitmaps, hash_pos))) + if (!kh_ens_is_end(hash_pos) && + (xor_bitmap = kh_ens_val(bitmap_git->bitmaps, hash_pos))) break; xor_items_nr++; xor_row = triplet.xor_row; @@ -841,9 +841,9 @@ static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_ struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, struct commit *commit) { - khiter_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps, + kh_ensitr_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps, commit->object.oid); - if (hash_pos >= kh_end(bitmap_git->bitmaps)) { + if (kh_ens_is_end(hash_pos)) { struct stored_bitmap *bitmap = NULL; if (!bitmap_git->table_lookup) return NULL; @@ -855,17 +855,17 @@ struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, return NULL; return lookup_stored_bitmap(bitmap); } - return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos)); + return lookup_stored_bitmap(kh_ens_val(bitmap_git->bitmaps, hash_pos)); } static inline int bitmap_position_extended(struct bitmap_index *bitmap_git, const struct object_id *oid) { kh_oid_pos_t *positions = bitmap_git->ext_index.positions; - khiter_t pos = kh_get_oid_pos(positions, *oid); + kh_ensitr_t pos = kh_get_oid_pos(positions, *oid); - if (pos < kh_end(positions)) { - int bitmap_pos = kh_value(positions, pos); + if (!kh_ens_is_end(pos)) { + int bitmap_pos = kh_ens_val(positions, pos); return bitmap_pos + bitmap_num_objects(bitmap_git); } @@ -913,7 +913,7 @@ static int ext_index_add_object(struct bitmap_index *bitmap_git, { struct eindex *eindex = &bitmap_git->ext_index; - khiter_t hash_pos; + kh_ensitr_t hash_pos; int hash_ret; int bitmap_pos; @@ -928,10 +928,10 @@ static int ext_index_add_object(struct bitmap_index *bitmap_git, bitmap_pos = eindex->count; eindex->objects[eindex->count] = object; eindex->hashes[eindex->count] = pack_name_hash(name); - kh_value(eindex->positions, hash_pos) = bitmap_pos; + kh_ens_val(eindex->positions, hash_pos) = bitmap_pos; eindex->count++; } else { - bitmap_pos = kh_value(eindex->positions, hash_pos); + bitmap_pos = kh_ens_val(eindex->positions, hash_pos); } return bitmap_pos + bitmap_num_objects(bitmap_git); @@ -2361,7 +2361,7 @@ int test_bitmap_commits(struct repository *r) die(_("failed to load bitmap indexes")); } - kh_foreach(bitmap_git->bitmaps, oid, value, { + kh_ens_foreach(bitmap_git->bitmaps, oid, value, { printf_ln("%s", oid_to_hex(&oid)); }); @@ -2479,7 +2479,7 @@ void free_bitmap_index(struct bitmap_index *b) ewah_pool_free(b->tags); if (b->bitmaps) { struct stored_bitmap *sb; - kh_foreach_value(b->bitmaps, sb, { + kh_ens_foreach_value(b->bitmaps, sb, { ewah_pool_free(sb->root); free(sb); });