All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [RFC 0/4] GSOC: prepare svndump for branch detection
@ 2012-08-20 21:57 Florian Achleitner
  2012-08-20 21:57 ` [RFC 1/4] svndump: move struct definitions to .h Florian Achleitner
  0 siblings, 1 reply; 5+ messages in thread
From: Florian Achleitner @ 2012-08-20 21:57 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, David Michael Barr, Jonathan Nieder

New Version with the following changes:
- remove sha1 calculation from vcs-svn.
- instead use marks to reference the blobs. 
- Distinguish the marks by setting the MSB for blobs.
- fixups by Junio from fa/vcs-svn

 [RFC 1/4] svndump: move struct definitions to .h
 [RFC 2/4] vcs-svn/svndump: restructure node_ctx, rev_ctx handling
 [RFC 3/4] vcs-svn/svndump: rewrite handle_node(),
 [RFC 4/4] vcs-svn: remove repo_tree

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC 1/4] svndump: move struct definitions to .h
  2012-08-20 21:57 [RFC 0/4] GSOC: prepare svndump for branch detection Florian Achleitner
@ 2012-08-20 21:57 ` Florian Achleitner
  2012-08-20 21:57   ` [RFC 2/4] vcs-svn/svndump: restructure node_ctx, rev_ctx handling Florian Achleitner
  0 siblings, 1 reply; 5+ messages in thread
From: Florian Achleitner @ 2012-08-20 21:57 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, David Michael Barr, Jonathan Nieder

This is a preparation for adding branch detection code in separate
.c files.

External branch detection functions will need to use these.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 vcs-svn/svndump.c |   19 +++----------------
 vcs-svn/svndump.h |   19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 31d1d83..6ca94de 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -38,23 +38,10 @@
 
 static struct line_buffer input = LINE_BUFFER_INIT;
 
-static struct {
-	uint32_t action, srcRev, type;
-	off_t prop_length, text_length;
-	struct strbuf src, dst;
-	uint32_t text_delta, prop_delta;
-} node_ctx;
+static struct node_ctx_t node_ctx;
+static struct rev_ctx_t rev_ctx;
+static struct dump_ctx_t dump_ctx;
 
-static struct {
-	uint32_t revision;
-	unsigned long timestamp;
-	struct strbuf log, author, note;
-} rev_ctx;
-
-static struct {
-	uint32_t version;
-	struct strbuf uuid, url;
-} dump_ctx;
 
 static void reset_node_ctx(char *fname)
 {
diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h
index b8eb129..d545453 100644
--- a/vcs-svn/svndump.h
+++ b/vcs-svn/svndump.h
@@ -1,5 +1,6 @@
 #ifndef SVNDUMP_H_
 #define SVNDUMP_H_
+#include "cache.h"
 
 int svndump_init(const char *filename);
 int svndump_init_fd(int in_fd, int back_fd);
@@ -7,4 +8,22 @@ void svndump_read(const char *url, const char *local_ref, const char *notes_ref)
 void svndump_deinit(void);
 void svndump_reset(void);
 
+struct node_ctx_t {
+	uint32_t action, srcRev, type;
+	off_t prop_length, text_length;
+	struct strbuf src, dst;
+	uint32_t text_delta, prop_delta;
+};
+
+struct rev_ctx_t {
+	uint32_t revision;
+	unsigned long timestamp;
+	struct strbuf log, author, note;
+};
+
+struct dump_ctx_t {
+	uint32_t version;
+	struct strbuf uuid, url;
+};
+
 #endif
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC 2/4] vcs-svn/svndump: restructure node_ctx, rev_ctx handling
  2012-08-20 21:57 ` [RFC 1/4] svndump: move struct definitions to .h Florian Achleitner
@ 2012-08-20 21:57   ` Florian Achleitner
  2012-08-20 21:57     ` [RFC 3/4] vcs-svn/svndump: rewrite handle_node(), begin|end_revision() Florian Achleitner
  0 siblings, 1 reply; 5+ messages in thread
From: Florian Achleitner @ 2012-08-20 21:57 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, David Michael Barr, Jonathan Nieder,
	Florian Achleitner

As a preparation for handling branches in svndumps, make rev_ctx and
node_ctx more flexible.

Add the object to work on to the arguments of reset_*_ctx() and to
handle_node() to allow for multiple *_ctx objects.

Convert the static global node_ctx to a linked list of such objects to
be able to accumulate all Node data of a revision in memory before
processing it.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 vcs-svn/svndump.c |  207 +++++++++++++++++++++++++++++++----------------------
 vcs-svn/svndump.h |    2 +
 2 files changed, 124 insertions(+), 85 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 6ca94de..385523a 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -38,42 +38,81 @@
 
 static struct line_buffer input = LINE_BUFFER_INIT;
 
-static struct node_ctx_t node_ctx;
+static struct node_ctx_t *node_ctx;
 static struct rev_ctx_t rev_ctx;
 static struct dump_ctx_t dump_ctx;
+static const char *current_ref;
 
+static struct node_ctx_t *node_list, *node_list_tail;
 
-static void reset_node_ctx(char *fname)
+static struct node_ctx_t *new_node_ctx(char *fname)
 {
-	node_ctx.type = 0;
-	node_ctx.action = NODEACT_UNKNOWN;
-	node_ctx.prop_length = -1;
-	node_ctx.text_length = -1;
-	strbuf_reset(&node_ctx.src);
-	node_ctx.srcRev = 0;
-	strbuf_reset(&node_ctx.dst);
+	struct node_ctx_t *node = xmalloc(sizeof(struct node_ctx_t));
+	trace_printf("new_node_ctx %p\n", node);
+	node->type = 0;
+	node->action = NODEACT_UNKNOWN;
+	node->prop_length = -1;
+	node->text_length = -1;
+	strbuf_init(&node->src, 4096);
+	node->srcRev = 0;
+	strbuf_init(&node->dst, 4096);
 	if (fname)
-		strbuf_addstr(&node_ctx.dst, fname);
-	node_ctx.text_delta = 0;
-	node_ctx.prop_delta = 0;
+		strbuf_addstr(&node->dst, fname);
+	node->text_delta = 0;
+	node->prop_delta = 0;
+	node->dataref = NULL;
+	node->next = NULL;
+	return node;
 }
 
-static void reset_rev_ctx(uint32_t revision)
+static void free_node_ctx(struct node_ctx_t *node)
 {
-	rev_ctx.revision = revision;
-	rev_ctx.timestamp = 0;
-	strbuf_reset(&rev_ctx.log);
-	strbuf_reset(&rev_ctx.author);
-	strbuf_reset(&rev_ctx.note);
+	trace_printf("free_node_ctx %p\n", node);
+	strbuf_release(&node->src);
+	strbuf_release(&node->dst);
+	free((char*)node->dataref);
+	free(node);
 }
 
-static void reset_dump_ctx(const char *url)
+static void free_node_list(void)
 {
-	strbuf_reset(&dump_ctx.url);
+	struct node_ctx_t *p = node_list, *n;
+	trace_printf("free_node_list head %p tail %p\n", node_list, node_list_tail);
+	while (p) {
+		n = p->next;
+		free_node_ctx(p);
+		p = n;
+	}
+	node_list = node_list_tail = NULL;
+}
+
+static void append_node_list(struct node_ctx_t *n)
+{
+	trace_printf("append_node_list %p head %p tail %p\n", n, node_list, node_list_tail);
+	if (!node_list)
+		node_list = node_list_tail = n;
+	else {
+		node_list_tail->next = n;
+		node_list_tail = n;
+	}
+}
+
+static void reset_rev_ctx(struct rev_ctx_t *rev, uint32_t revision)
+{
+	rev->revision = revision;
+	rev->timestamp = 0;
+	strbuf_reset(&rev->log);
+	strbuf_reset(&rev->author);
+	strbuf_reset(&rev->note);
+}
+
+static void reset_dump_ctx(struct dump_ctx_t *dump, const char *url)
+{
+	strbuf_reset(&dump->url);
 	if (url)
-		strbuf_addstr(&dump_ctx.url, url);
-	dump_ctx.version = 1;
-	strbuf_reset(&dump_ctx.uuid);
+		strbuf_addstr(&dump->url, url);
+	dump->version = 1;
+	strbuf_reset(&dump->uuid);
 }
 
 static void handle_property(const struct strbuf *key_buf,
@@ -121,11 +160,11 @@ static void handle_property(const struct strbuf *key_buf,
 			die("invalid dump: sets type twice");
 		}
 		if (!val) {
-			node_ctx.type = REPO_MODE_BLB;
+			node_ctx->type = REPO_MODE_BLB;
 			return;
 		}
 		*type_set = 1;
-		node_ctx.type = keylen == strlen("svn:executable") ?
+		node_ctx->type = keylen == strlen("svn:executable") ?
 				REPO_MODE_EXE :
 				REPO_MODE_LNK;
 	}
@@ -193,11 +232,11 @@ static void read_props(void)
 	}
 }
 
-static void handle_node(void)
+static void handle_node(struct node_ctx_t *node)
 {
-	const uint32_t type = node_ctx.type;
-	const int have_props = node_ctx.prop_length != -1;
-	const int have_text = node_ctx.text_length != -1;
+	const uint32_t type = node->type;
+	const int have_props = node->prop_length != -1;
+	const int have_text = node->text_length != -1;
 	/*
 	 * Old text for this node:
 	 *  NULL	- directory or bug
@@ -208,21 +247,21 @@ static void handle_node(void)
 	const char *old_data = NULL;
 	uint32_t old_mode = REPO_MODE_BLB;
 
-	if (node_ctx.action == NODEACT_DELETE) {
-		if (have_text || have_props || node_ctx.srcRev)
+	if (node->action == NODEACT_DELETE) {
+		if (have_text || have_props || node->srcRev)
 			die("invalid dump: deletion node has "
 				"copyfrom info, text, or properties");
-		repo_delete(node_ctx.dst.buf);
+		repo_delete(node->dst.buf);
 		return;
 	}
-	if (node_ctx.action == NODEACT_REPLACE) {
-		repo_delete(node_ctx.dst.buf);
-		node_ctx.action = NODEACT_ADD;
+	if (node->action == NODEACT_REPLACE) {
+		repo_delete(node->dst.buf);
+		node->action = NODEACT_ADD;
 	}
-	if (node_ctx.srcRev) {
-		repo_copy(node_ctx.srcRev, node_ctx.src.buf, node_ctx.dst.buf);
-		if (node_ctx.action == NODEACT_ADD)
-			node_ctx.action = NODEACT_CHANGE;
+	if (node->srcRev) {
+		repo_copy(node->srcRev, node->src.buf, node->dst.buf);
+		if (node->action == NODEACT_ADD)
+			node->action = NODEACT_CHANGE;
 	}
 	if (have_text && type == REPO_MODE_DIR)
 		die("invalid dump: directories cannot have text attached");
@@ -230,20 +269,20 @@ static void handle_node(void)
 	/*
 	 * Find old content (old_data) and decide on the new mode.
 	 */
-	if (node_ctx.action == NODEACT_CHANGE && !*node_ctx.dst.buf) {
+	if (node->action == NODEACT_CHANGE && !*node->dst.buf) {
 		if (type != REPO_MODE_DIR)
 			die("invalid dump: root of tree is not a regular file");
 		old_data = NULL;
-	} else if (node_ctx.action == NODEACT_CHANGE) {
+	} else if (node->action == NODEACT_CHANGE) {
 		uint32_t mode;
-		old_data = repo_read_path(node_ctx.dst.buf, &mode);
+		old_data = repo_read_path(node->dst.buf, &mode); /* malloced buffer */
 		if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
 			die("invalid dump: cannot modify a directory into a file");
 		if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
 			die("invalid dump: cannot modify a file into a directory");
-		node_ctx.type = mode;
+		node->type = mode;
 		old_mode = mode;
-	} else if (node_ctx.action == NODEACT_ADD) {
+	} else if (node->action == NODEACT_ADD) {
 		if (type == REPO_MODE_DIR)
 			old_data = NULL;
 		else if (have_text)
@@ -258,9 +297,9 @@ static void handle_node(void)
 	 * Adjust mode to reflect properties.
 	 */
 	if (have_props) {
-		if (!node_ctx.prop_delta)
-			node_ctx.type = type;
-		if (node_ctx.prop_length)
+		if (!node->prop_delta)
+			node->type = type;
+		if (node->prop_length)
 			read_props();
 	}
 
@@ -274,17 +313,17 @@ static void handle_node(void)
 		/* For the fast_export_* functions, NULL means empty. */
 		old_data = NULL;
 	if (!have_text) {
-		fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data);
+		fast_export_modify(node->dst.buf, node->type, old_data);
 		return;
 	}
-	if (!node_ctx.text_delta) {
-		fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
-		fast_export_data(node_ctx.type, node_ctx.text_length, &input);
+	if (!node->text_delta) {
+		fast_export_modify(node->dst.buf, node->type, "inline");
+		fast_export_data(node->type, node->text_length, &input);
 		return;
 	}
-	fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
-	fast_export_blob_delta(node_ctx.type, old_mode, old_data,
-				node_ctx.text_length, &input);
+	fast_export_modify(node->dst.buf, node->type, "inline");
+	fast_export_blob_delta(node->type, old_mode, old_data,
+				node->text_length, &input);
 }
 
 static void begin_revision(const char *remote_ref)
@@ -316,7 +355,7 @@ void svndump_read(const char *url, const char *local_ref, const char *notes_ref)
 	uint32_t active_ctx = DUMP_CTX;
 	uint32_t len;
 
-	reset_dump_ctx(url);
+	reset_dump_ctx(&dump_ctx, url);
 	while ((t = buffer_read_line(&input))) {
 		val = strchr(t, ':');
 		if (!val)
@@ -346,13 +385,13 @@ void svndump_read(const char *url, const char *local_ref, const char *notes_ref)
 			if (constcmp(t, "Revision-number"))
 				continue;
 			if (active_ctx == NODE_CTX)
-				handle_node();
+				handle_node(node_ctx);
 			if (active_ctx == REV_CTX)
 				begin_revision(local_ref);
 			if (active_ctx != DUMP_CTX)
 				end_revision(notes_ref);
 			active_ctx = REV_CTX;
-			reset_rev_ctx(atoi(val));
+			reset_rev_ctx(&rev_ctx, atoi(val));
 			strbuf_addf(&rev_ctx.note, "%s\n", t);
 			break;
 		case sizeof("Node-path"):
@@ -360,11 +399,11 @@ void svndump_read(const char *url, const char *local_ref, const char *notes_ref)
 				continue;
 			if (!constcmp(t + strlen("Node-"), "path")) {
 				if (active_ctx == NODE_CTX)
-					handle_node();
+					handle_node(node_ctx);
 				if (active_ctx == REV_CTX)
 					begin_revision(local_ref);
 				active_ctx = NODE_CTX;
-				reset_node_ctx(val);
+				node_ctx = new_node_ctx(val);
 				strbuf_addf(&rev_ctx.note, "%s\n", t);
 				break;
 			}
@@ -372,9 +411,9 @@ void svndump_read(const char *url, const char *local_ref, const char *notes_ref)
 				continue;
 			strbuf_addf(&rev_ctx.note, "%s\n", t);
 			if (!strcmp(val, "dir"))
-				node_ctx.type = REPO_MODE_DIR;
+				node_ctx->type = REPO_MODE_DIR;
 			else if (!strcmp(val, "file"))
-				node_ctx.type = REPO_MODE_BLB;
+				node_ctx->type = REPO_MODE_BLB;
 			else
 				fprintf(stderr, "Unknown node-kind: %s\n", val);
 			break;
@@ -383,29 +422,29 @@ void svndump_read(const char *url, const char *local_ref, const char *notes_ref)
 				continue;
 			strbuf_addf(&rev_ctx.note, "%s\n", t);
 			if (!strcmp(val, "delete")) {
-				node_ctx.action = NODEACT_DELETE;
+				node_ctx->action = NODEACT_DELETE;
 			} else if (!strcmp(val, "add")) {
-				node_ctx.action = NODEACT_ADD;
+				node_ctx->action = NODEACT_ADD;
 			} else if (!strcmp(val, "change")) {
-				node_ctx.action = NODEACT_CHANGE;
+				node_ctx->action = NODEACT_CHANGE;
 			} else if (!strcmp(val, "replace")) {
-				node_ctx.action = NODEACT_REPLACE;
+				node_ctx->action = NODEACT_REPLACE;
 			} else {
 				fprintf(stderr, "Unknown node-action: %s\n", val);
-				node_ctx.action = NODEACT_UNKNOWN;
+				node_ctx->action = NODEACT_UNKNOWN;
 			}
 			break;
 		case sizeof("Node-copyfrom-path"):
 			if (constcmp(t, "Node-copyfrom-path"))
 				continue;
-			strbuf_reset(&node_ctx.src);
-			strbuf_addstr(&node_ctx.src, val);
+			strbuf_reset(&node_ctx->src);
+			strbuf_addstr(&node_ctx->src, val);
 			strbuf_addf(&rev_ctx.note, "%s\n", t);
 			break;
 		case sizeof("Node-copyfrom-rev"):
 			if (constcmp(t, "Node-copyfrom-rev"))
 				continue;
-			node_ctx.srcRev = atoi(val);
+			node_ctx->srcRev = atoi(val);
 			strbuf_addf(&rev_ctx.note, "%s\n", t);
 			break;
 		case sizeof("Text-content-length"):
@@ -424,19 +463,19 @@ void svndump_read(const char *url, const char *local_ref, const char *notes_ref)
 					die("unrepresentable length in dump: %s", val);
 
 				if (*t == 'T')
-					node_ctx.text_length = (off_t) len;
+					node_ctx->text_length = (off_t) len;
 				else
-					node_ctx.prop_length = (off_t) len;
+					node_ctx->prop_length = (off_t) len;
 				break;
 			}
 		case sizeof("Text-delta"):
 			if (!constcmp(t, "Text-delta")) {
-				node_ctx.text_delta = !strcmp(val, "true");
+				node_ctx->text_delta = !strcmp(val, "true");
 				break;
 			}
 			if (constcmp(t, "Prop-delta"))
 				continue;
-			node_ctx.prop_delta = !strcmp(val, "true");
+			node_ctx->prop_delta = !strcmp(val, "true");
 			break;
 		case sizeof("Content-length"):
 			if (constcmp(t, "Content-length"))
@@ -450,7 +489,7 @@ void svndump_read(const char *url, const char *local_ref, const char *notes_ref)
 			if (active_ctx == REV_CTX) {
 				read_props();
 			} else if (active_ctx == NODE_CTX) {
-				handle_node();
+				handle_node(node_ctx);
 				active_ctx = INTERNODE_CTX;
 			} else {
 				fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len);
@@ -462,7 +501,7 @@ void svndump_read(const char *url, const char *local_ref, const char *notes_ref)
 	if (buffer_ferror(&input))
 		die_short_read();
 	if (active_ctx == NODE_CTX)
-		handle_node();
+		handle_node(node_ctx);
 	if (active_ctx == REV_CTX)
 		begin_revision(local_ref);
 	if (active_ctx != DUMP_CTX)
@@ -477,11 +516,10 @@ static void init(int report_fd)
 	strbuf_init(&rev_ctx.log, 4096);
 	strbuf_init(&rev_ctx.author, 4096);
 	strbuf_init(&rev_ctx.note, 4096);
-	strbuf_init(&node_ctx.src, 4096);
-	strbuf_init(&node_ctx.dst, 4096);
-	reset_dump_ctx(NULL);
-	reset_rev_ctx(0);
-	reset_node_ctx(NULL);
+	reset_dump_ctx(&dump_ctx, NULL);
+	reset_rev_ctx(&rev_ctx, 0);
+	node_ctx = new_node_ctx(NULL);
+	node_list = node_list_tail = NULL;
 	return;
 }
 
@@ -504,14 +542,12 @@ int svndump_init_fd(int in_fd, int back_fd)
 void svndump_deinit(void)
 {
 	fast_export_deinit();
-	reset_dump_ctx(NULL);
-	reset_rev_ctx(0);
-	reset_node_ctx(NULL);
+	reset_dump_ctx(&dump_ctx, NULL);
+	reset_rev_ctx(&rev_ctx, 0);
 	strbuf_release(&rev_ctx.log);
 	strbuf_release(&rev_ctx.author);
 	strbuf_release(&rev_ctx.note);
-	strbuf_release(&node_ctx.src);
-	strbuf_release(&node_ctx.dst);
+	free_node_list();
 	if (buffer_deinit(&input))
 		fprintf(stderr, "Input error\n");
 	if (ferror(stdout))
@@ -524,4 +560,5 @@ void svndump_reset(void)
 	strbuf_release(&dump_ctx.url);
 	strbuf_release(&rev_ctx.log);
 	strbuf_release(&rev_ctx.author);
+	free_node_list();
 }
diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h
index d545453..29794df 100644
--- a/vcs-svn/svndump.h
+++ b/vcs-svn/svndump.h
@@ -13,6 +13,8 @@ struct node_ctx_t {
 	off_t prop_length, text_length;
 	struct strbuf src, dst;
 	uint32_t text_delta, prop_delta;
+	const char *dataref;
+	struct node_ctx_t *next;
 };
 
 struct rev_ctx_t {
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC 3/4] vcs-svn/svndump: rewrite handle_node(), begin|end_revision()
  2012-08-20 21:57   ` [RFC 2/4] vcs-svn/svndump: restructure node_ctx, rev_ctx handling Florian Achleitner
@ 2012-08-20 21:57     ` Florian Achleitner
  2012-08-20 21:57       ` [RFC 4/4] vcs-svn: remove repo_tree Florian Achleitner
  0 siblings, 1 reply; 5+ messages in thread
From: Florian Achleitner @ 2012-08-20 21:57 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, David Michael Barr, Jonathan Nieder,
	Florian Achleitner

Split the decision of what to do and actually doing it in
handle_node() to allow for detection of branches from svn nodes.
Split it into handle_node() and apply_node().

svn dumps are structured in revisions, which contain multiple nodes.
Nodes represent operations on data. Currently the function
handle_node() strongly mixes the interpretation of the node data with
the output of processed data to fast-import.

In a fast-import stream a commit object requires a branch name to
which the new commit is added at its beginning.

We want to detect branches in svn. This can only be done by analyzing
node operations, like copyfrom. This conflicts with the current
implementation, where at the beginning of each new revision in the svn
dump, a new commit on a hard-coded git branch is created, before even
reading the first node.

To allow analyzing the nodes before deciding on which branch the
commit will be placed, store the node metadata of one complete
revision, and create a commit from it, when it ends.

Each node can have file data appended. It's desirable to not store the
actual file data, as it is unbounded.  fast-import has a 'blob'
command that allows writing blobs, independent of commits. Use this
feature instead of sending data inline and send the actual file data
immediately when it is read in.

Use marks to reference a blob later. fast-import's marks are currently
used for marking commits, where the mark number corresponds to exactly
one svn revision.
Store the marks for blobs in the upper half of the marks number space
where the MSB is 1.

Change handle_node() to interpret the node data, store it in a
node_ctx, send blobs to fast-import, and append the new node_ctx to
the list of node_ctx.  Do this until the end of a revision.

Just clear the list of note_ctx in begin_revision().

At end_revision() all node metadata is available in the node_ctx list.
Future's branch detectors can decide what branches are to be changed.
Then, call apply_node() for each of them to actually create a commit
and change/add/delete files according to the node_ctx using the
already added blobs.

This can also be used to create commits if the node metadata does not
come from a svndump, but is stored in e.g. notes, for later branch
detection.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 vcs-svn/svndump.c |  167 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 109 insertions(+), 58 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 385523a..28ce2aa 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -48,7 +48,6 @@ static struct node_ctx_t *node_list, *node_list_tail;
 static struct node_ctx_t *new_node_ctx(char *fname)
 {
 	struct node_ctx_t *node = xmalloc(sizeof(struct node_ctx_t));
-	trace_printf("new_node_ctx %p\n", node);
 	node->type = 0;
 	node->action = NODEACT_UNKNOWN;
 	node->prop_length = -1;
@@ -67,7 +66,6 @@ static struct node_ctx_t *new_node_ctx(char *fname)
 
 static void free_node_ctx(struct node_ctx_t *node)
 {
-	trace_printf("free_node_ctx %p\n", node);
 	strbuf_release(&node->src);
 	strbuf_release(&node->dst);
 	free((char*)node->dataref);
@@ -77,7 +75,6 @@ static void free_node_ctx(struct node_ctx_t *node)
 static void free_node_list(void)
 {
 	struct node_ctx_t *p = node_list, *n;
-	trace_printf("free_node_list head %p tail %p\n", node_list, node_list_tail);
 	while (p) {
 		n = p->next;
 		free_node_ctx(p);
@@ -88,7 +85,6 @@ static void free_node_list(void)
 
 static void append_node_list(struct node_ctx_t *n)
 {
-	trace_printf("append_node_list %p head %p tail %p\n", n, node_list, node_list_tail);
 	if (!node_list)
 		node_list = node_list_tail = n;
 	else {
@@ -246,23 +242,10 @@ static void handle_node(struct node_ctx_t *node)
 	static const char *const empty_blob = "::empty::";
 	const char *old_data = NULL;
 	uint32_t old_mode = REPO_MODE_BLB;
+	struct strbuf sb = STRBUF_INIT;
+	static uintmax_t blobmark = 1UL << (bitsizeof(uintmax_t) - 1);
+
 
-	if (node->action == NODEACT_DELETE) {
-		if (have_text || have_props || node->srcRev)
-			die("invalid dump: deletion node has "
-				"copyfrom info, text, or properties");
-		repo_delete(node->dst.buf);
-		return;
-	}
-	if (node->action == NODEACT_REPLACE) {
-		repo_delete(node->dst.buf);
-		node->action = NODEACT_ADD;
-	}
-	if (node->srcRev) {
-		repo_copy(node->srcRev, node->src.buf, node->dst.buf);
-		if (node->action == NODEACT_ADD)
-			node->action = NODEACT_CHANGE;
-	}
 	if (have_text && type == REPO_MODE_DIR)
 		die("invalid dump: directories cannot have text attached");
 
@@ -270,28 +253,61 @@ static void handle_node(struct node_ctx_t *node)
 	 * Find old content (old_data) and decide on the new mode.
 	 */
 	if (node->action == NODEACT_CHANGE && !*node->dst.buf) {
+		/*
+		 * changes the root of the tree (empty dst), e.g. adding properties.
+		 * see 9e8c5321
+		 */
 		if (type != REPO_MODE_DIR)
-			die("invalid dump: root of tree is not a regular file");
+			die("invalid dump: root of tree is not a directory");
 		old_data = NULL;
 	} else if (node->action == NODEACT_CHANGE) {
 		uint32_t mode;
-		old_data = repo_read_path(node->dst.buf, &mode); /* malloced buffer */
+		if (fast_export_ls_rev(rev_ctx.revision - 1, node->dst.buf, &mode, &sb)) {
+			if (errno != ENOENT)
+				die_errno("BUG: unexpected fast_export_ls error");
+			/* Treat missing paths as directories. */
+			mode = REPO_MODE_DIR;
+			old_data = NULL;
+		} else
+			old_data = strbuf_detach(&sb, NULL);
 		if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
-			die("invalid dump: cannot modify a directory into a file");
+			die("invalid dump: cannot modify a directory into a file: "
+					"%s. old_data %s", node->dst.buf, old_data);
 		if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
-			die("invalid dump: cannot modify a file into a directory");
+			die("invalid dump: cannot modify a file into a directory: %s",
+					node->dst.buf);
 		node->type = mode;
 		old_mode = mode;
-	} else if (node->action == NODEACT_ADD) {
-		if (type == REPO_MODE_DIR)
+	} else if (node->action == NODEACT_ADD || node->action == NODEACT_REPLACE) {
+		if (node->srcRev) {	/* was copied */
+			/* read dataref and mode from src blob */
+			strbuf_reset(&sb);
+			if (fast_export_ls_rev(node->srcRev, node->src.buf, &node->type, &sb)) {
+				if (errno != ENOENT)
+					die_errno("BUG: unexpected fast_export_ls_rev error");
+				/*
+				 * if the dataref is not available, it may be a copy of an empty
+				 * dir. We delete the target, and write it when the first file
+				 * is added.
+				 */
+				node->action = NODEACT_DELETE;
+				node->dataref = NULL;
+			} else
+				node->dataref = strbuf_detach(&sb, NULL);
+			old_data = NULL;
+		} else if (type == REPO_MODE_DIR)
 			old_data = NULL;
 		else if (have_text)
 			old_data = empty_blob;
 		else
 			die("invalid dump: adds node without text");
-	} else {
+	} else if (node->action == NODEACT_DELETE) {
+		old_data = empty_blob;
+		if (have_text || have_props || node->srcRev)
+			die("invalid dump: deletion node has "
+				"copyfrom info, text, or properties");
+	} else
 		die("invalid dump: Node-path block lacks Node-action");
-	}
 
 	/*
 	 * Adjust mode to reflect properties.
@@ -304,48 +320,83 @@ static void handle_node(struct node_ctx_t *node)
 	}
 
 	/*
-	 * Save the result.
+	 * Send the data and save the node_ctx.
 	 */
-	if (type == REPO_MODE_DIR)	/* directories are not tracked. */
-		return;
-	assert(old_data);
-	if (old_data == empty_blob)
-		/* For the fast_export_* functions, NULL means empty. */
-		old_data = NULL;
-	if (!have_text) {
-		fast_export_modify(node->dst.buf, node->type, old_data);
-		return;
+	if (type != REPO_MODE_DIR) {	/* directories are not tracked. */
+		assert(old_data);
+		if (old_data == empty_blob)
+			/* For the fast_export_* functions, NULL means empty. */
+			old_data = NULL;
+		if (!have_text)
+			node->dataref = old_data;
+		else {
+			if (!node->text_delta) {
+				printf("blob\n"
+						"mark :%"PRIuMAX"\n", ++blobmark);
+				fast_export_data(node->type, node->text_length, &input);
+			}
+			else {
+				printf("blob\n"
+						"mark :%"PRIuMAX"\n", ++blobmark);
+				fast_export_blob_delta(node->type, old_mode, old_data,
+						node->text_length, &input);
+			}
+
+			strbuf_addf(&sb, ":%lu", blobmark);
+			node->dataref = sb.buf;
+		}
 	}
-	if (!node->text_delta) {
-		fast_export_modify(node->dst.buf, node->type, "inline");
-		fast_export_data(node->type, node->text_length, &input);
+	append_node_list(node);
+}
+
+static void apply_node(struct node_ctx_t *node)
+{
+	if (node->action == NODEACT_DELETE) {
+		fast_export_delete(node->dst.buf);
 		return;
 	}
-	fast_export_modify(node->dst.buf, node->type, "inline");
-	fast_export_blob_delta(node->type, old_mode, old_data,
-				node->text_length, &input);
+	if (node->action == NODEACT_REPLACE)
+		fast_export_delete(node->dst.buf);
+	/*
+	 * apply the previously sent node-data to a commit
+	 */
+	if (node->dataref)
+		fast_export_modify(node->dst.buf, node->type, node->dataref);
+}
+
+
+static void apply_node_list(void)
+{
+	struct node_ctx_t *p = node_list, *n;
+	while (p) {
+		n = p->next;
+		apply_node(p);
+		p = n;
+	}
+}
+
+static void begin_revision(const char *remote_ref_)
+{
+	current_ref = remote_ref_;
+	free_node_list();
 }
 
-static void begin_revision(const char *remote_ref)
+static void end_revision(const char *note_ref)
 {
+	struct strbuf mark = STRBUF_INIT;
 	if (!rev_ctx.revision)	/* revision 0 gets no git commit. */
 		return;
 	fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf,
 		&rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf,
-		rev_ctx.timestamp, remote_ref);
-}
+		rev_ctx.timestamp, current_ref);
+	apply_node_list();
+	fast_export_end_commit(rev_ctx.revision);
 
-static void end_revision(const char *note_ref)
-{
-	struct strbuf mark = STRBUF_INIT;
-	if (rev_ctx.revision) {
-		fast_export_end_commit(rev_ctx.revision);
-		fast_export_begin_note(rev_ctx.revision, "remote-svn",
-				"Note created by remote-svn.", rev_ctx.timestamp, note_ref);
-		strbuf_addf(&mark, ":%"PRIu32, rev_ctx.revision);
-		fast_export_note(mark.buf, "inline");
-		fast_export_buf_to_data(&rev_ctx.note);
-	}
+	fast_export_begin_note(rev_ctx.revision, "remote-svn",
+			"Note created by remote-svn.", rev_ctx.timestamp, note_ref);
+	strbuf_addf(&mark, ":%"PRIu32, rev_ctx.revision);
+	fast_export_note(mark.buf, "inline");
+	fast_export_buf_to_data(&rev_ctx.note);
 }
 
 void svndump_read(const char *url, const char *local_ref, const char *notes_ref)
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC 4/4] vcs-svn: remove repo_tree
  2012-08-20 21:57     ` [RFC 3/4] vcs-svn/svndump: rewrite handle_node(), begin|end_revision() Florian Achleitner
@ 2012-08-20 21:57       ` Florian Achleitner
  0 siblings, 0 replies; 5+ messages in thread
From: Florian Achleitner @ 2012-08-20 21:57 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, David Michael Barr, Jonathan Nieder,
	Florian Achleitner

Rewritten svndump.c left only very little functionality in repo_tree.c
which could easily be inlined. Let's remove it.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile            |    1 -
 vcs-svn/repo_tree.c |   48 ------------------------------------------------
 vcs-svn/repo_tree.h |   14 --------------
 3 files changed, 63 deletions(-)
 delete mode 100644 vcs-svn/repo_tree.c

diff --git a/Makefile b/Makefile
index fb5cdcf..4e14903 100644
--- a/Makefile
+++ b/Makefile
@@ -2188,7 +2188,6 @@ XDIFF_OBJS += xdiff/xhistogram.o
 
 VCSSVN_OBJS += vcs-svn/line_buffer.o
 VCSSVN_OBJS += vcs-svn/sliding_window.o
-VCSSVN_OBJS += vcs-svn/repo_tree.o
 VCSSVN_OBJS += vcs-svn/fast_export.o
 VCSSVN_OBJS += vcs-svn/svndiff.o
 VCSSVN_OBJS += vcs-svn/svndump.o
diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c
deleted file mode 100644
index 67d27f0..0000000
--- a/vcs-svn/repo_tree.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed under a two-clause BSD-style license.
- * See LICENSE for details.
- */
-
-#include "git-compat-util.h"
-#include "strbuf.h"
-#include "repo_tree.h"
-#include "fast_export.h"
-
-const char *repo_read_path(const char *path, uint32_t *mode_out)
-{
-	int err;
-	static struct strbuf buf = STRBUF_INIT;
-
-	strbuf_reset(&buf);
-	err = fast_export_ls(path, mode_out, &buf);
-	if (err) {
-		if (errno != ENOENT)
-			die_errno("BUG: unexpected fast_export_ls error");
-		/* Treat missing paths as directories. */
-		*mode_out = REPO_MODE_DIR;
-		return NULL;
-	}
-	return buf.buf;
-}
-
-void repo_copy(uint32_t revision, const char *src, const char *dst)
-{
-	int err;
-	uint32_t mode;
-	static struct strbuf data = STRBUF_INIT;
-
-	strbuf_reset(&data);
-	err = fast_export_ls_rev(revision, src, &mode, &data);
-	if (err) {
-		if (errno != ENOENT)
-			die_errno("BUG: unexpected fast_export_ls_rev error");
-		fast_export_delete(dst);
-		return;
-	}
-	fast_export_modify(dst, mode, data.buf);
-}
-
-void repo_delete(const char *path)
-{
-	fast_export_delete(path);
-}
diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h
index 889c6a3..6d0f51e 100644
--- a/vcs-svn/repo_tree.h
+++ b/vcs-svn/repo_tree.h
@@ -1,23 +1,9 @@
 #ifndef REPO_TREE_H_
 #define REPO_TREE_H_
 
-struct strbuf;
-
 #define REPO_MODE_DIR 0040000
 #define REPO_MODE_BLB 0100644
 #define REPO_MODE_EXE 0100755
 #define REPO_MODE_LNK 0120000
 
-uint32_t next_blob_mark(void);
-void repo_copy(uint32_t revision, const char *src, const char *dst);
-void repo_add(const char *path, uint32_t mode, uint32_t blob_mark);
-const char *repo_read_path(const char *path, uint32_t *mode_out);
-void repo_delete(const char *path);
-void repo_commit(uint32_t revision, const char *author,
-		const struct strbuf *log, const char *uuid, const char *url,
-		long unsigned timestamp);
-void repo_diff(uint32_t r1, uint32_t r2);
-void repo_init(void);
-void repo_reset(void);
-
 #endif
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-08-20 21:58 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-20 21:57 [RFC 0/4] GSOC: prepare svndump for branch detection Florian Achleitner
2012-08-20 21:57 ` [RFC 1/4] svndump: move struct definitions to .h Florian Achleitner
2012-08-20 21:57   ` [RFC 2/4] vcs-svn/svndump: restructure node_ctx, rev_ctx handling Florian Achleitner
2012-08-20 21:57     ` [RFC 3/4] vcs-svn/svndump: rewrite handle_node(), begin|end_revision() Florian Achleitner
2012-08-20 21:57       ` [RFC 4/4] vcs-svn: remove repo_tree Florian Achleitner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.