From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] WIP
Date: Sat, 15 Jul 2017 01:42:29 +0000 [thread overview]
Message-ID: <20170715014229.19791-1-e@80x24.org> (raw)
---
Makefile | 2 +
cbm_ref.c | 438 ++++++++++++++++++++++++++++++++++++++++++++++++++++
cbm_ref.h | 16 ++
t/helper/.gitignore | 1 +
t/helper/test-cbm.c | 45 ++++++
5 files changed, 502 insertions(+)
create mode 100644 cbm_ref.c
create mode 100644 cbm_ref.h
create mode 100644 t/helper/test-cbm.c
diff --git a/Makefile b/Makefile
index ba4359ef8d..8e60926d4d 100644
--- a/Makefile
+++ b/Makefile
@@ -633,6 +633,7 @@ X =
PROGRAMS += $(patsubst %.o,git-%$X,$(PROGRAM_OBJS))
+TEST_PROGRAMS_NEED_X += test-cbm
TEST_PROGRAMS_NEED_X += test-chmtime
TEST_PROGRAMS_NEED_X += test-ctype
TEST_PROGRAMS_NEED_X += test-config
@@ -748,6 +749,7 @@ LIB_OBJS += branch.o
LIB_OBJS += bulk-checkin.o
LIB_OBJS += bundle.o
LIB_OBJS += cache-tree.o
+LIB_OBJS += cbm_ref.o
LIB_OBJS += color.o
LIB_OBJS += column.o
LIB_OBJS += combine-diff.o
diff --git a/cbm_ref.c b/cbm_ref.c
new file mode 100644
index 0000000000..c2f2dcacb1
--- /dev/null
+++ b/cbm_ref.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) Eric Wong <e@80x24.org>
+ * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+ *
+ * Byte ordering: Network byte order on filesystem, all in-memory
+ * structures are in native byte order
+ *
+ */
+#include "git-compat-util.h"
+#include "cache.h"
+#include "cbm_ref.h"
+
+/*
+ * align blocks up to the nearest 16 bytes boundary,
+ * it should give mmap compatibility for all 64-bit systems
+ */
+#define CBM_ALIGN 16
+
+/* don't penalize the majority of users who will never need >= 4GB for refs */
+typedef uint32_t cbm_off_t;
+#define CBM_SIZE_MAX UINT32_MAX
+
+/* like "struct list_head" in list.h, but "pointers" are file offsets */
+struct flist_head {
+ cbm_off_t next, prev;
+};
+
+/* the first 36 bytes of the file contains this header: */
+struct cbm_ref_tree {
+ uint8_t magic_bits[16]; /* version, etc .. */
+ struct flist_head free_list;
+ cbm_off_t root; /* 0 == empty */
+};
+/* ... the rest of the file is just made of several cbm_ref, one per ref: */
+struct cbm_ref {
+ uint16_t total_size; /* includes final '\0' in name[] */
+ uint16_t name_byte;
+ uint16_t oid_byte;
+ uint8_t name_otherbits;
+ uint8_t oid_otherbits;
+
+ /* "pointers" are file offsets */
+ cbm_off_t name_child[2];
+ cbm_off_t oid_child[2];
+ union {
+ struct flist_head free_space;
+ struct flist_head oid_alias;
+ } as;
+ struct object_id oid;
+ char name[FLEX_ARRAY];
+};
+
+/* open file, this struct in memory only (malloc-ed) */
+struct cbm {
+ int fd;
+ int flags;
+ /*
+ * We can add mmap support, later:
+ char *mmptr;
+ size_t mmsize;
+ */
+ struct cbm_ref_tree t;
+};
+
+static inline size_t size_align(size_t size)
+{
+ return ((size + (CBM_ALIGN - 1)) & ~(CBM_ALIGN - 1));
+}
+
+static cbm_off_t ntoh_off(cbm_off_t off)
+{
+ return sizeof(off) == sizeof(uint32_t) ? ntohl(off) : ntohll(off);
+}
+
+static cbm_off_t hton_off(cbm_off_t off)
+{
+ return sizeof(off) == sizeof(uint32_t) ? htonl(off) : htonll(off);
+}
+
+static void pwrite_or_die(int fd, void *buf, size_t len, off_t off)
+{
+ ssize_t w;
+again:
+ w = pwrite(fd, buf, len, off);
+ if (w < 0) {
+ if (errno == EINTR) goto again;
+ die_errno("pwrite failed");
+ }
+ if (w != len)
+ die("short pwrite: %ld < %lu", (long)w, (unsigned long)len);
+}
+
+static void ntoh_flist(struct flist_head *head)
+{
+ head->next = ntoh_off(head->next);
+ head->prev = ntoh_off(head->prev);
+}
+
+static void hton_flist(struct flist_head *head)
+{
+ head->next = hton_off(head->next);
+ head->prev = hton_off(head->prev);
+}
+
+static void xpread_in_full(int fd, void *ptr, size_t len, off_t off)
+{
+ ssize_t r = pread_in_full(fd, ptr, len, off);
+
+ if (r < 0)
+ die_errno("pread failed @ %"PRIuMAX"\n", (uintmax_t)off);
+ if (r != (ssize_t)len)
+ die("pread short %ld/%"PRIuMAX" @ %"PRIuMAX"\n",
+ (long)r, (uintmax_t)len, (uintmax_t)off);
+}
+
+static cbm_off_t ptr_read(const struct cbm *cbm, cbm_off_t off)
+{
+ cbm_off_t ptr;
+ xpread_in_full(cbm->fd, &ptr, sizeof(ptr), off);
+
+ return ntoh_off(ptr);
+}
+
+static void ptr_write(const struct cbm *cbm, cbm_off_t off, cbm_off_t val)
+{
+ cbm_off_t tmp = hton_off(val);
+
+ pwrite_or_die(cbm->fd, &tmp, sizeof(tmp), off);
+}
+
+#define FLIST_INIT(type, ptr, MEMBER) \
+ flist_init(&((ptr)->MEMBER), offsetof(type,MEMBER))
+
+static void flist_init(struct flist_head *head, cbm_off_t base)
+{
+ head->next = head->prev = base;
+}
+
+static void ntoh_cbm_ref(struct cbm_ref *ent)
+{
+ ent->total_size = ntohs(ent->total_size);
+ ent->name_byte = ntohs(ent->name_byte);
+ ent->name_child[0] = ntoh_off(ent->name_child[0]);
+ ent->name_child[1] = ntoh_off(ent->name_child[1]);
+ ent->oid_byte = ntohs(ent->oid_byte);
+ ent->oid_child[0] = ntoh_off(ent->oid_child[0]);
+ ent->oid_child[1] = ntoh_off(ent->oid_child[1]);
+ ntoh_flist(&ent->as.oid_alias);
+}
+
+static void hton_cbm_ref(struct cbm_ref *ent)
+{
+ ent->total_size = htons(ent->total_size);
+ ent->name_byte = htons(ent->name_byte);
+ ent->name_child[0] = hton_off(ent->name_child[0]);
+ ent->name_child[1] = hton_off(ent->name_child[1]);
+ ent->oid_byte = htons(ent->oid_byte);
+ ent->oid_child[0] = hton_off(ent->oid_child[0]);
+ ent->oid_child[1] = hton_off(ent->oid_child[1]);
+ hton_flist(&ent->as.oid_alias);
+}
+
+/* loads a cbm_ref into memory from a given FS offset */
+static void
+cbm_ref_load(const struct cbm *cbm, struct cbm_ref **ent, cbm_off_t off)
+{
+ size_t size = size_align(sizeof(struct cbm_ref));
+ uint16_t total_size;
+ struct cbm_ref *node;
+
+ node = *ent = xrealloc(*ent, size);
+ xpread_in_full(cbm->fd, node, size, off);
+
+ total_size = ntohs(node->total_size);
+ if (total_size > size) {
+ size = total_size;
+ *ent = xrealloc(*ent, size);
+ xpread_in_full(cbm->fd, *ent, size, off);
+ }
+ ntoh_cbm_ref(*ent);
+}
+
+static void
+cbm_ref_dump(struct cbm *cbm, const struct cbm_ref *ent, cbm_off_t off)
+{
+ struct cbm_ref *tmp = alloca(ent->total_size);
+
+ memcpy(tmp, ent, ent->total_size);
+ hton_cbm_ref(tmp);
+ pwrite_or_die(cbm->fd, tmp, ent->total_size, off);
+}
+
+static cbm_off_t
+nearest_by_name(const struct cbm *cbm, struct cbm_ref **ent,
+ const char *name, cbm_off_t p)
+{
+ const uint8_t *const ubytes = (const uint8_t *)name;
+ const size_t name_len = strlen(name);
+
+ while (1 & p) {
+ uint8_t c = 0;
+ int direction;
+ struct cbm_ref *cur;
+
+ cbm_ref_load(cbm, ent, p - 1);
+ cur = *ent;
+
+ if (cur->name_byte < name_len)
+ c = ubytes[cur->name_byte];
+ direction = (1 + (cur->name_otherbits | c)) >> 8;
+ p = cur->name_child[direction];
+ }
+ return p;
+}
+
+/* returns true and populates oid if found, false if not */
+int cbm_fetch(const struct cbm *cbm, const char *name, struct object_id *oid)
+{
+ cbm_off_t p = cbm->t.root;
+ struct cbm_ref *ent = NULL;
+ int ret;
+
+ if (!p)
+ return 0;
+
+ p = nearest_by_name(cbm, &ent, name, p);
+ cbm_ref_load(cbm, &ent, p);
+ ret = 0 == strcmp(name, ent->name);
+ if (ret)
+ oidcpy(oid, &ent->oid);
+ free(ent);
+ return ret;
+}
+
+/* returns an offset where ent->total_size can be safely written */
+static cbm_off_t
+cbm_alloc_begin(struct cbm *cbm, const struct cbm_ref *ent)
+{
+ struct stat sb;
+ /* TODO: scan free_list */
+
+ if (fstat(cbm->fd, &sb) < 0)
+ die_errno("fstat failed");
+
+ if (sb.st_size == 0)
+ sb.st_size = sizeof(cbm->t);
+ else if (sb.st_size < sizeof(cbm->t))
+ die("cbm size mismatch %"PRIuMAX, sb.st_size);
+
+ if (sb.st_size > CBM_SIZE_MAX || sb.st_size < 0)
+ die("size overflow");
+
+ return (cbm_off_t)sb.st_size;
+}
+
+int cbm_set(struct cbm *cbm, const char *name, const struct object_id *oid)
+{
+ const uint8_t *const ubytes = (const uint8_t *)name;
+ const size_t name_len = strlen(name);
+ cbm_off_t p = cbm->t.root;
+ struct cbm_ref *new_node, *cur = NULL;
+ uint32_t new_byte;
+ uint32_t new_otherbits;
+ uint32_t new_direction;
+ uint8_t c;
+ size_t size = size_align(name_len + 1 + sizeof(*new_node));
+ size_t wherep;
+ const uint8_t *cur_ubytes;
+ cbm_off_t new_off;
+
+ if (size > UINT16_MAX) {
+ warning("too long: %s %"PRIuMAX, name, (uintmax_t)size);
+ return 0;
+ }
+
+ if (!p) {
+ new_node = alloca(size);
+ memset(new_node, 0, sizeof(*new_node));
+ new_node->total_size = (uint16_t)size;
+ memcpy(new_node->name, name, name_len + 1);
+ oidcpy(&new_node->oid, oid);
+
+ cbm->t.root = cbm_alloc_begin(cbm, new_node);
+ cbm_ref_dump(cbm, new_node, cbm->t.root);
+
+ return 2;
+ }
+
+ p = nearest_by_name(cbm, &cur, name, p);
+ if (!cur)
+ cbm_ref_load(cbm, &cur, p);
+
+ fprintf(stderr, "nearest :%u %p %s\n", p, cur, cur->name);
+ cur_ubytes = (void *)cur->name;
+ for (new_byte = 0; new_byte < name_len; new_byte++) {
+ if (cur_ubytes[new_byte] != ubytes[new_byte]) {
+ new_otherbits = cur_ubytes[new_byte] ^ ubytes[new_byte];
+ goto different_byte_found;
+ }
+ }
+
+ if (cur_ubytes[new_byte] != 0) {
+ new_otherbits = cur_ubytes[new_byte];
+ goto different_byte_found;
+ }
+
+ /* overwrite existing ref */
+ fprintf(stderr, "clobber\n");
+ assert(!strcmp(cur->name, name));
+ if (oidcmp(&cur->oid, oid)) {
+ oidcpy(&cur->oid, oid);
+ cbm_ref_dump(cbm, cur, p);
+ }
+ free(cur);
+
+ return 1;
+
+different_byte_found:
+
+ new_otherbits |= new_otherbits >> 1;
+ new_otherbits |= new_otherbits >> 2;
+ new_otherbits |= new_otherbits >> 4;
+ new_otherbits = (new_otherbits & ~(new_otherbits >> 1)) ^ 255;
+
+ c = cur_ubytes[new_byte];
+ new_direction = (1 + (new_otherbits | c)) >> 8;
+
+ new_node = alloca(size);
+ memset(new_node, 0, size);
+ new_node->total_size = (uint16_t)size;
+ memcpy(new_node->name, name, name_len + 1);
+ oidcpy(&new_node->oid, oid);
+
+ cbm_flush(cbm, 0); /* ensure cbm->t.root is synchronized begin: */
+ new_off = cbm_alloc_begin(cbm, new_node);
+
+ new_node->name_byte = new_byte;
+ new_node->name_otherbits = new_otherbits;
+ new_node->name_child[1 - new_direction] = new_off; /* point to self */
+
+ wherep = offsetof(struct cbm_ref_tree, root);
+ p = ptr_read(cbm, wherep);
+ while (1 & p) {
+ int direction;
+
+ p--;
+ cbm_ref_load(cbm, &cur, p);
+
+ if (cur->name_byte > new_byte)
+ break;
+ if (cur->name_byte == new_byte &&
+ cur->name_otherbits > new_otherbits)
+ break;
+ c = 0;
+ if (cur->name_byte < name_len)
+ c = ubytes[cur->name_byte];
+
+ fprintf(stderr, "off: %zu\n", offsetof(struct cbm_ref, name_child));
+ wherep = p + offsetof(struct cbm_ref, name_child);
+ direction = (1 + (cur->name_otherbits | c)) >> 8;
+ if (direction)
+ wherep += sizeof(cbm_off_t);
+ p = ptr_read(cbm, wherep);
+ }
+
+ free(cur);
+
+ new_node->name_child[new_direction] = ptr_read(cbm, wherep);
+ cbm_ref_dump(cbm, new_node, new_off);
+
+ if (wherep == offsetof(struct cbm_ref_tree, root))
+ cbm->t.root = new_off + 1; /* to be flushed, later */
+ else
+ ptr_write(cbm, wherep, new_off + 1);
+
+ return 2;
+}
+
+void cbm_flush(struct cbm *cbm, unsigned int flush_flags)
+{
+ struct cbm_ref_tree t = cbm->t;
+
+ hton_flist(&t.free_list);
+ t.root = hton_off(t.root);
+ pwrite_or_die(cbm->fd, &t, sizeof(t), 0);
+
+ if (flush_flags & CBM_FSYNC)
+ fsync(cbm->fd);
+}
+
+struct cbm *cbm_open(const char *path, int flags)
+{
+ struct cbm *cbm;
+ ssize_t n;
+ int fd = xopen(path, flags, 0666);
+
+ if (fd < 0)
+ return NULL;
+#if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC)
+ {
+ int flags = fcntl(fd, F_GETFD);
+ if (flags == -1)
+ die_errno("F_GETFD failed");
+ if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == -1)
+ die_errno("F_SETFD failed");
+ }
+#endif
+
+ cbm = xmalloc(sizeof(*cbm));
+ cbm->fd = fd;
+ cbm->flags = flags;
+
+ n = pread_in_full(fd, &cbm->t, sizeof(cbm->t), 0);
+
+ if (n == 0) {
+ cbm->t.root = 0;
+ FLIST_INIT(struct cbm_ref_tree, &cbm->t, free_list);
+ } else if (n == sizeof(cbm->t)) {
+ cbm->t.root = ntoh_off(cbm->t.root);
+ fprintf(stderr, "root: %u\n", cbm->t.root);
+ ntoh_flist(&cbm->t.free_list);
+ } else {
+ die("unexpected cbm size: %ld", n);
+ }
+
+ return cbm;
+}
+
+void cbm_close(struct cbm *cbm, unsigned int close_flags)
+{
+ if (cbm->flags & O_RDWR)
+ cbm_flush(cbm, close_flags);
+ else if (close_flags & CBM_FSYNC)
+ fsync(cbm->fd);
+
+ close(cbm->fd);
+ free(cbm);
+}
diff --git a/cbm_ref.h b/cbm_ref.h
new file mode 100644
index 0000000000..1ecc65052c
--- /dev/null
+++ b/cbm_ref.h
@@ -0,0 +1,16 @@
+#ifndef CBM_REF_H
+#define CBM_REF_H
+#include "cache.h"
+struct cbm;
+
+#define CBM_FSYNC (0x1u)
+
+struct cbm *cbm_open(const char *path, int flags);
+void cbm_flush(struct cbm *cbm, unsigned int flush_flags);
+void cbm_close(struct cbm *, unsigned int flags);
+
+int cbm_fetch(const struct cbm *, const char *name, struct object_id *);
+int cbm_delete(struct cbm *, const char *name, struct object_id *);
+int cbm_set(struct cbm *, const char *name, const struct object_id *);
+
+#endif /* CBM_REF_H */
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index 721650256e..71648cc187 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -1,3 +1,4 @@
+/test-cbm
/test-chmtime
/test-ctype
/test-config
diff --git a/t/helper/test-cbm.c b/t/helper/test-cbm.c
new file mode 100644
index 0000000000..f77603ca8f
--- /dev/null
+++ b/t/helper/test-cbm.c
@@ -0,0 +1,45 @@
+#include "cbm_ref.h"
+#include "git-compat-util.h"
+
+static const char usage_str[] = "FILE [REFNAME [HEXVALUE]]";
+
+int cmd_main(int argc, const char **argv)
+{
+ int ret = 1;
+
+ if (argc <= 0) {
+ fprintf(stderr, "usage: %s %s\n", argv[0], usage_str);
+ } else if (argc == 2) {
+ fprintf(stderr, "dump todo\n");
+ } else if (argc >= 3 && argc <= 4) {
+ struct cbm *cbm = cbm_open(argv[1], O_RDWR|O_CREAT);
+ struct object_id oid;
+
+ if (!cbm)
+ die_errno("failed to open: %s", argv[1]);
+
+ if (argc == 3) {
+ if (cbm_fetch(cbm, argv[2], &oid)) {
+ printf("%s\n", oid_to_hex(&oid));
+ ret = 0;
+ } else {
+ fprintf(stderr, "not found\n");
+ }
+ } else if (argc == 4) {
+ const char *p = argv[3];
+
+ if (parse_oid_hex(p, &oid, &p))
+ die("invalid hex: %s", argv[3]);
+
+ switch (cbm_set(cbm, argv[2], &oid)) {
+ case 1: fprintf(stderr, "overwritten\n"); break;
+ case 2: fprintf(stderr, "new\n"); break;
+ default: die("cbm_set error");
+ }
+
+ ret = 0;
+ }
+ cbm_close(cbm, 0);
+ }
+ return ret;
+}
--
EW
next reply other threads:[~2017-07-15 1:42 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-15 1:42 Eric Wong [this message]
-- strict thread matches above, loose matches on Subject: below --
2021-10-27 20:16 [PATCH] wip Eric Wong
2021-06-05 19:58 Eric Wong
2021-04-05 7:42 Eric Wong
2021-03-08 7:11 Eric Wong
2021-01-21 4:24 [PATCH] WIP Eric Wong
2021-01-03 22:57 [PATCH] wip Eric Wong
2020-12-27 11:36 [PATCH] WIP Eric Wong
2020-11-15 7:35 [PATCH] wip Eric Wong
2020-04-23 4:27 Eric Wong
2020-04-20 7:14 Eric Wong
2020-01-13 9:24 [PATCH] WIP Eric Wong
2019-05-11 22:55 Eric Wong
2019-01-02 9:21 [PATCH] wip Eric Wong
2018-07-06 21:31 Eric Wong
2018-06-24 11:55 Eric Wong
2018-06-24 8:39 Eric Wong
2017-04-12 20:17 Eric Wong
2017-04-05 18:40 Eric Wong
2016-08-23 20:07 Eric Wong
2016-08-18 2:16 Eric Wong
2016-06-26 3:46 Eric Wong
2015-12-22 0:15 Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170715014229.19791-1-e@80x24.org \
--to=e@80x24.org \
--cc=spew@80x24.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).