dumping ground for random patches and texts
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] WIP
Date: Sat, 15 Jul 2017 01:42:29 +0000	[thread overview]
Message-ID: <20170715014229.19791-1-e@80x24.org> (raw)

---
 Makefile            |   2 +
 cbm_ref.c           | 438 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 cbm_ref.h           |  16 ++
 t/helper/.gitignore |   1 +
 t/helper/test-cbm.c |  45 ++++++
 5 files changed, 502 insertions(+)
 create mode 100644 cbm_ref.c
 create mode 100644 cbm_ref.h
 create mode 100644 t/helper/test-cbm.c

diff --git a/Makefile b/Makefile
index ba4359ef8d..8e60926d4d 100644
--- a/Makefile
+++ b/Makefile
@@ -633,6 +633,7 @@ X =
 
 PROGRAMS += $(patsubst %.o,git-%$X,$(PROGRAM_OBJS))
 
+TEST_PROGRAMS_NEED_X += test-cbm
 TEST_PROGRAMS_NEED_X += test-chmtime
 TEST_PROGRAMS_NEED_X += test-ctype
 TEST_PROGRAMS_NEED_X += test-config
@@ -748,6 +749,7 @@ LIB_OBJS += branch.o
 LIB_OBJS += bulk-checkin.o
 LIB_OBJS += bundle.o
 LIB_OBJS += cache-tree.o
+LIB_OBJS += cbm_ref.o
 LIB_OBJS += color.o
 LIB_OBJS += column.o
 LIB_OBJS += combine-diff.o
diff --git a/cbm_ref.c b/cbm_ref.c
new file mode 100644
index 0000000000..c2f2dcacb1
--- /dev/null
+++ b/cbm_ref.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) Eric Wong <e@80x24.org>
+ * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+ *
+ * Byte ordering: Network byte order on filesystem, all in-memory
+ * structures are in native byte order
+ *
+ */
+#include "git-compat-util.h"
+#include "cache.h"
+#include "cbm_ref.h"
+
+/*
+ * align blocks up to the nearest 16 bytes boundary,
+ * it should give mmap compatibility for all 64-bit systems
+ */
+#define CBM_ALIGN 16
+
+/* don't penalize the majority of users who will never need >= 4GB for refs */
+typedef uint32_t cbm_off_t;
+#define CBM_SIZE_MAX UINT32_MAX
+
+/* like "struct list_head" in list.h, but "pointers" are file offsets */
+struct flist_head {
+	cbm_off_t next, prev;
+};
+
+/* the first 36 bytes of the file contains this header: */
+struct cbm_ref_tree {
+	uint8_t magic_bits[16]; /* version, etc .. */
+	struct flist_head free_list;
+	cbm_off_t root; /* 0 == empty */
+};
+/* ... the rest of the file is just made of several cbm_ref, one per ref: */
+struct cbm_ref {
+	uint16_t total_size; /* includes final '\0' in name[] */
+	uint16_t name_byte;
+	uint16_t oid_byte;
+	uint8_t name_otherbits;
+	uint8_t oid_otherbits;
+
+	/* "pointers" are file offsets */
+	cbm_off_t name_child[2];
+	cbm_off_t oid_child[2];
+	union {
+		struct flist_head free_space;
+		struct flist_head oid_alias;
+	} as;
+	struct object_id oid;
+	char name[FLEX_ARRAY];
+};
+
+/* open file, this struct in memory only (malloc-ed) */
+struct cbm {
+	int fd;
+	int flags;
+	/*
+	 * We can add mmap support, later:
+	char *mmptr;
+	size_t mmsize;
+	*/
+	struct cbm_ref_tree t;
+};
+
+static inline size_t size_align(size_t size)
+{
+	return ((size + (CBM_ALIGN - 1)) & ~(CBM_ALIGN - 1));
+}
+
+static cbm_off_t ntoh_off(cbm_off_t off)
+{
+	return sizeof(off) == sizeof(uint32_t)  ? ntohl(off) : ntohll(off);
+}
+
+static cbm_off_t hton_off(cbm_off_t off)
+{
+	return sizeof(off) == sizeof(uint32_t)  ? htonl(off) : htonll(off);
+}
+
+static void pwrite_or_die(int fd, void *buf, size_t len, off_t off)
+{
+	ssize_t w;
+again:
+	w = pwrite(fd, buf, len, off);
+	if (w < 0) {
+		if (errno == EINTR) goto again;
+		die_errno("pwrite failed");
+	}
+	if (w != len)
+		die("short pwrite: %ld < %lu", (long)w, (unsigned long)len);
+}
+
+static void ntoh_flist(struct flist_head *head)
+{
+	head->next = ntoh_off(head->next);
+	head->prev = ntoh_off(head->prev);
+}
+
+static void hton_flist(struct flist_head *head)
+{
+	head->next = hton_off(head->next);
+	head->prev = hton_off(head->prev);
+}
+
+static void xpread_in_full(int fd, void *ptr, size_t len, off_t off)
+{
+	ssize_t r = pread_in_full(fd, ptr, len, off);
+
+	if (r < 0)
+		die_errno("pread failed @ %"PRIuMAX"\n", (uintmax_t)off);
+	if (r != (ssize_t)len)
+		die("pread short %ld/%"PRIuMAX" @ %"PRIuMAX"\n",
+			(long)r, (uintmax_t)len, (uintmax_t)off);
+}
+
+static cbm_off_t ptr_read(const struct cbm *cbm, cbm_off_t off)
+{
+	cbm_off_t ptr;
+	xpread_in_full(cbm->fd, &ptr, sizeof(ptr), off);
+
+	return ntoh_off(ptr);
+}
+
+static void ptr_write(const struct cbm *cbm, cbm_off_t off, cbm_off_t val)
+{
+	cbm_off_t tmp = hton_off(val);
+
+	pwrite_or_die(cbm->fd, &tmp, sizeof(tmp), off);
+}
+
+#define FLIST_INIT(type, ptr, MEMBER) \
+	flist_init(&((ptr)->MEMBER), offsetof(type,MEMBER))
+
+static void flist_init(struct flist_head *head, cbm_off_t base)
+{
+	head->next = head->prev = base;
+}
+
+static void ntoh_cbm_ref(struct cbm_ref *ent)
+{
+	ent->total_size = ntohs(ent->total_size);
+	ent->name_byte = ntohs(ent->name_byte);
+	ent->name_child[0] = ntoh_off(ent->name_child[0]);
+	ent->name_child[1] = ntoh_off(ent->name_child[1]);
+	ent->oid_byte = ntohs(ent->oid_byte);
+	ent->oid_child[0] = ntoh_off(ent->oid_child[0]);
+	ent->oid_child[1] = ntoh_off(ent->oid_child[1]);
+	ntoh_flist(&ent->as.oid_alias);
+}
+
+static void hton_cbm_ref(struct cbm_ref *ent)
+{
+	ent->total_size = htons(ent->total_size);
+	ent->name_byte = htons(ent->name_byte);
+	ent->name_child[0] = hton_off(ent->name_child[0]);
+	ent->name_child[1] = hton_off(ent->name_child[1]);
+	ent->oid_byte = htons(ent->oid_byte);
+	ent->oid_child[0] = hton_off(ent->oid_child[0]);
+	ent->oid_child[1] = hton_off(ent->oid_child[1]);
+	hton_flist(&ent->as.oid_alias);
+}
+
+/* loads a cbm_ref into memory from a given FS offset */
+static void
+cbm_ref_load(const struct cbm *cbm, struct cbm_ref **ent, cbm_off_t off)
+{
+	size_t size = size_align(sizeof(struct cbm_ref));
+	uint16_t total_size;
+	struct cbm_ref *node;
+
+	node = *ent = xrealloc(*ent, size);
+	xpread_in_full(cbm->fd, node, size, off);
+
+	total_size = ntohs(node->total_size);
+	if (total_size > size) {
+		size = total_size;
+		*ent = xrealloc(*ent, size);
+		xpread_in_full(cbm->fd, *ent, size, off);
+	}
+	ntoh_cbm_ref(*ent);
+}
+
+static void
+cbm_ref_dump(struct cbm *cbm, const struct cbm_ref *ent, cbm_off_t off)
+{
+	struct cbm_ref *tmp = alloca(ent->total_size);
+
+	memcpy(tmp, ent, ent->total_size);
+	hton_cbm_ref(tmp);
+	pwrite_or_die(cbm->fd, tmp, ent->total_size, off);
+}
+
+static cbm_off_t
+nearest_by_name(const struct cbm *cbm, struct cbm_ref **ent,
+		const char *name, cbm_off_t p)
+{
+	const uint8_t *const ubytes = (const uint8_t *)name;
+	const size_t name_len = strlen(name);
+
+	while (1 & p) {
+		uint8_t c = 0;
+		int direction;
+		struct cbm_ref *cur;
+
+		cbm_ref_load(cbm, ent, p - 1);
+		cur = *ent;
+
+		if (cur->name_byte < name_len)
+			c = ubytes[cur->name_byte];
+		direction = (1 + (cur->name_otherbits | c)) >> 8;
+		p = cur->name_child[direction];
+	}
+	return p;
+}
+
+/* returns true and populates oid if found, false if not */
+int cbm_fetch(const struct cbm *cbm, const char *name, struct object_id *oid)
+{
+	cbm_off_t p = cbm->t.root;
+	struct cbm_ref *ent = NULL;
+	int ret;
+
+	if (!p)
+		return 0;
+
+	p = nearest_by_name(cbm, &ent, name, p);
+	cbm_ref_load(cbm, &ent, p);
+	ret = 0 == strcmp(name, ent->name);
+	if (ret)
+		oidcpy(oid, &ent->oid);
+	free(ent);
+	return ret;
+}
+
+/* returns an offset where ent->total_size can be safely written */
+static cbm_off_t
+cbm_alloc_begin(struct cbm *cbm, const struct cbm_ref *ent)
+{
+	struct stat sb;
+	/* TODO: scan free_list */
+
+	if (fstat(cbm->fd, &sb) < 0)
+		die_errno("fstat failed");
+
+	if (sb.st_size == 0)
+		sb.st_size = sizeof(cbm->t);
+	else if (sb.st_size < sizeof(cbm->t))
+		die("cbm size mismatch %"PRIuMAX, sb.st_size);
+
+	if (sb.st_size > CBM_SIZE_MAX || sb.st_size < 0)
+		die("size overflow");
+
+	return (cbm_off_t)sb.st_size;
+}
+
+int cbm_set(struct cbm *cbm, const char *name, const struct object_id *oid)
+{
+	const uint8_t *const ubytes = (const uint8_t *)name;
+	const size_t name_len = strlen(name);
+	cbm_off_t p = cbm->t.root;
+	struct cbm_ref *new_node, *cur = NULL;
+	uint32_t new_byte;
+	uint32_t new_otherbits;
+	uint32_t new_direction;
+	uint8_t c;
+	size_t size = size_align(name_len + 1 + sizeof(*new_node));
+	size_t wherep;
+	const uint8_t *cur_ubytes;
+	cbm_off_t new_off;
+
+	if (size > UINT16_MAX) {
+		warning("too long: %s %"PRIuMAX, name, (uintmax_t)size);
+		return 0;
+	}
+
+	if (!p) {
+		new_node = alloca(size);
+		memset(new_node, 0, sizeof(*new_node));
+		new_node->total_size = (uint16_t)size;
+		memcpy(new_node->name, name, name_len + 1);
+		oidcpy(&new_node->oid, oid);
+
+		cbm->t.root = cbm_alloc_begin(cbm, new_node);
+		cbm_ref_dump(cbm, new_node, cbm->t.root);
+
+		return 2;
+	}
+
+	p = nearest_by_name(cbm, &cur, name, p);
+	if (!cur)
+		cbm_ref_load(cbm, &cur, p);
+
+	fprintf(stderr, "nearest :%u %p %s\n", p, cur, cur->name);
+	cur_ubytes = (void *)cur->name;
+	for (new_byte = 0; new_byte < name_len; new_byte++) {
+		if (cur_ubytes[new_byte] != ubytes[new_byte]) {
+			new_otherbits = cur_ubytes[new_byte] ^ ubytes[new_byte];
+			goto different_byte_found;
+		}
+	}
+
+	if (cur_ubytes[new_byte] != 0) {
+		new_otherbits = cur_ubytes[new_byte];
+		goto different_byte_found;
+	}
+
+	/* overwrite existing ref */
+	fprintf(stderr, "clobber\n");
+	assert(!strcmp(cur->name, name));
+	if (oidcmp(&cur->oid, oid)) {
+		oidcpy(&cur->oid, oid);
+		cbm_ref_dump(cbm, cur, p);
+	}
+	free(cur);
+
+	return 1;
+
+different_byte_found:
+
+	new_otherbits |= new_otherbits >> 1;
+	new_otherbits |= new_otherbits >> 2;
+	new_otherbits |= new_otherbits >> 4;
+	new_otherbits = (new_otherbits & ~(new_otherbits >> 1)) ^ 255;
+
+	c = cur_ubytes[new_byte];
+	new_direction = (1 + (new_otherbits | c)) >> 8;
+
+	new_node = alloca(size);
+	memset(new_node, 0, size);
+	new_node->total_size = (uint16_t)size;
+	memcpy(new_node->name, name, name_len + 1);
+	oidcpy(&new_node->oid, oid);
+
+	cbm_flush(cbm, 0); /* ensure cbm->t.root is synchronized begin: */
+	new_off = cbm_alloc_begin(cbm, new_node);
+
+	new_node->name_byte = new_byte;
+	new_node->name_otherbits = new_otherbits;
+	new_node->name_child[1 - new_direction] = new_off; /* point to self */
+
+	wherep = offsetof(struct cbm_ref_tree, root);
+	p = ptr_read(cbm, wherep);
+	while (1 & p) {
+		int direction;
+
+		p--;
+		cbm_ref_load(cbm, &cur, p);
+
+		if (cur->name_byte > new_byte)
+			break;
+		if (cur->name_byte == new_byte &&
+				cur->name_otherbits > new_otherbits)
+			break;
+		c = 0;
+		if (cur->name_byte < name_len)
+			c = ubytes[cur->name_byte];
+
+		fprintf(stderr, "off: %zu\n", offsetof(struct cbm_ref, name_child));
+		wherep = p + offsetof(struct cbm_ref, name_child);
+		direction = (1 + (cur->name_otherbits | c)) >> 8;
+		if (direction)
+			wherep += sizeof(cbm_off_t);
+		p = ptr_read(cbm, wherep);
+	}
+
+	free(cur);
+
+	new_node->name_child[new_direction] = ptr_read(cbm, wherep);
+	cbm_ref_dump(cbm, new_node, new_off);
+
+	if (wherep == offsetof(struct cbm_ref_tree, root))
+		cbm->t.root = new_off + 1; /* to be flushed, later */
+	else
+		ptr_write(cbm, wherep, new_off + 1);
+
+	return 2;
+}
+
+void cbm_flush(struct cbm *cbm, unsigned int flush_flags)
+{
+	struct cbm_ref_tree t = cbm->t;
+
+	hton_flist(&t.free_list);
+	t.root = hton_off(t.root);
+	pwrite_or_die(cbm->fd, &t, sizeof(t), 0);
+
+	if (flush_flags & CBM_FSYNC)
+		fsync(cbm->fd);
+}
+
+struct cbm *cbm_open(const char *path, int flags)
+{
+	struct cbm *cbm;
+	ssize_t n;
+	int fd = xopen(path, flags, 0666);
+
+	if (fd < 0)
+		return NULL;
+#if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC)
+	{
+		int flags = fcntl(fd, F_GETFD);
+		if (flags == -1)
+			die_errno("F_GETFD failed");
+		if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == -1)
+			die_errno("F_SETFD failed");
+	}
+#endif
+
+	cbm = xmalloc(sizeof(*cbm));
+	cbm->fd = fd;
+	cbm->flags = flags;
+
+	n = pread_in_full(fd, &cbm->t, sizeof(cbm->t), 0);
+
+	if (n == 0) {
+		cbm->t.root = 0;
+		FLIST_INIT(struct cbm_ref_tree, &cbm->t, free_list);
+	} else if (n == sizeof(cbm->t)) {
+		cbm->t.root = ntoh_off(cbm->t.root);
+		fprintf(stderr, "root: %u\n", cbm->t.root);
+		ntoh_flist(&cbm->t.free_list);
+	} else {
+		die("unexpected cbm size: %ld", n);
+	}
+
+	return cbm;
+}
+
+void cbm_close(struct cbm *cbm, unsigned int close_flags)
+{
+	if (cbm->flags & O_RDWR)
+		cbm_flush(cbm, close_flags);
+	else if (close_flags & CBM_FSYNC)
+		fsync(cbm->fd);
+
+	close(cbm->fd);
+	free(cbm);
+}
diff --git a/cbm_ref.h b/cbm_ref.h
new file mode 100644
index 0000000000..1ecc65052c
--- /dev/null
+++ b/cbm_ref.h
@@ -0,0 +1,16 @@
+#ifndef CBM_REF_H
+#define CBM_REF_H
+#include "cache.h"
+struct cbm;
+
+#define CBM_FSYNC (0x1u)
+
+struct cbm *cbm_open(const char *path, int flags);
+void cbm_flush(struct cbm *cbm, unsigned int flush_flags);
+void cbm_close(struct cbm *, unsigned int flags);
+
+int cbm_fetch(const struct cbm *, const char *name, struct object_id *);
+int cbm_delete(struct cbm *, const char *name, struct object_id *);
+int cbm_set(struct cbm *, const char *name, const struct object_id *);
+
+#endif /* CBM_REF_H */
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index 721650256e..71648cc187 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -1,3 +1,4 @@
+/test-cbm
 /test-chmtime
 /test-ctype
 /test-config
diff --git a/t/helper/test-cbm.c b/t/helper/test-cbm.c
new file mode 100644
index 0000000000..f77603ca8f
--- /dev/null
+++ b/t/helper/test-cbm.c
@@ -0,0 +1,45 @@
+#include "cbm_ref.h"
+#include "git-compat-util.h"
+
+static const char usage_str[] = "FILE [REFNAME [HEXVALUE]]";
+
+int cmd_main(int argc, const char **argv)
+{
+	int ret = 1;
+
+	if (argc <= 0) {
+		fprintf(stderr, "usage: %s %s\n", argv[0], usage_str);
+	} else if (argc == 2) {
+		fprintf(stderr, "dump todo\n");
+	} else if (argc >= 3 && argc <= 4) {
+		struct cbm *cbm = cbm_open(argv[1], O_RDWR|O_CREAT);
+		struct object_id oid;
+
+		if (!cbm)
+			die_errno("failed to open: %s", argv[1]);
+
+		if (argc == 3) {
+			if (cbm_fetch(cbm, argv[2], &oid)) {
+				printf("%s\n", oid_to_hex(&oid));
+				ret = 0;
+			} else {
+				fprintf(stderr, "not found\n");
+			}
+		} else if (argc == 4) {
+			const char *p = argv[3];
+
+			if (parse_oid_hex(p, &oid, &p))
+				die("invalid hex: %s", argv[3]);
+
+			switch (cbm_set(cbm, argv[2], &oid)) {
+			case 1: fprintf(stderr, "overwritten\n"); break;
+			case 2: fprintf(stderr, "new\n"); break;
+			default: die("cbm_set error");
+			}
+
+			ret = 0;
+		}
+		cbm_close(cbm, 0);
+	}
+	return ret;
+}
-- 
EW


             reply	other threads:[~2017-07-15  1:42 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-15  1:42 Eric Wong [this message]
  -- strict thread matches above, loose matches on Subject: below --
2021-10-27 20:16 [PATCH] wip Eric Wong
2021-06-05 19:58 Eric Wong
2021-04-05  7:42 Eric Wong
2021-03-08  7:11 Eric Wong
2021-01-21  4:24 [PATCH] WIP Eric Wong
2021-01-03 22:57 [PATCH] wip Eric Wong
2020-12-27 11:36 [PATCH] WIP Eric Wong
2020-11-15  7:35 [PATCH] wip Eric Wong
2020-04-23  4:27 Eric Wong
2020-04-20  7:14 Eric Wong
2020-01-13  9:24 [PATCH] WIP Eric Wong
2019-05-11 22:55 Eric Wong
2019-01-02  9:21 [PATCH] wip Eric Wong
2018-07-06 21:31 Eric Wong
2018-06-24 11:55 Eric Wong
2018-06-24  8:39 Eric Wong
2017-04-12 20:17 Eric Wong
2017-04-05 18:40 Eric Wong
2016-08-23 20:07 Eric Wong
2016-08-18  2:16 Eric Wong
2016-06-26  3:46 Eric Wong
2015-12-22  0:15 Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170715014229.19791-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=spew@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).