Commit 337b2b08f46ea77d61fa66657ad62d8702bc233a

Edward Thomson 2015-11-30T20:53:54

Merge pull request #3508 from libgit2/cmn/tree-parse-speed Improvements to tree parsing speed

diff --git a/src/tree.c b/src/tree.c
index bdd1766..0a32868 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -81,14 +81,26 @@ int git_tree_entry_icmp(const git_tree_entry *e1, const git_tree_entry *e2)
 		git__strncasecmp);
 }
 
-static git_tree_entry *alloc_entry(const char *filename)
+/**
+ * Allocate either from the pool or from the system allocator
+ */
+static git_tree_entry *alloc_entry_base(git_pool *pool, const char *filename, size_t filename_len)
 {
 	git_tree_entry *entry = NULL;
-	size_t filename_len = strlen(filename), tree_len;
+	size_t tree_len;
+
+	if (filename_len > UINT16_MAX) {
+		giterr_set(GITERR_INVALID, "tree entry is over UINT16_MAX in length");
+		return NULL;
+	}
 
 	if (GIT_ADD_SIZET_OVERFLOW(&tree_len, sizeof(git_tree_entry), filename_len) ||
-		GIT_ADD_SIZET_OVERFLOW(&tree_len, tree_len, 1) ||
-		!(entry = git__malloc(tree_len)))
+	    GIT_ADD_SIZET_OVERFLOW(&tree_len, tree_len, 1))
+		return NULL;
+
+	entry = pool ? git_pool_malloc(pool, tree_len) :
+		       git__malloc(tree_len);
+	if (!entry)
 		return NULL;
 
 	memset(entry, 0x0, sizeof(git_tree_entry));
@@ -99,9 +111,31 @@ static git_tree_entry *alloc_entry(const char *filename)
 	return entry;
 }
 
+/**
+ * Allocate a tree entry, using the poolin the tree which owns
+ * it. This is useful when reading trees, so we don't allocate a ton
+ * of small strings but can use the pool.
+ */
+static git_tree_entry *alloc_entry_pooled(git_pool *pool, const char *filename, size_t filename_len)
+{
+	git_tree_entry *entry = NULL;
+
+	if (!(entry = alloc_entry_base(pool, filename, filename_len)))
+		return NULL;
+
+	entry->pooled = true;
+
+	return entry;
+}
+
+static git_tree_entry *alloc_entry(const char *filename)
+{
+	return alloc_entry_base(NULL, filename, strlen(filename));
+}
+
 struct tree_key_search {
 	const char *filename;
-	size_t filename_len;
+	uint16_t filename_len;
 };
 
 static int homing_search_cmp(const void *key, const void *array_member)
@@ -198,7 +232,7 @@ static int tree_key_search(
 
 void git_tree_entry_free(git_tree_entry *entry)
 {
-	if (entry == NULL)
+	if (entry == NULL || entry->pooled)
 		return;
 
 	git__free(entry);
@@ -233,6 +267,7 @@ void git_tree__free(void *_tree)
 		git_tree_entry_free(e);
 
 	git_vector_free(&tree->entries);
+	git_pool_clear(&tree->pool);
 	git__free(tree);
 }
 
@@ -385,11 +420,14 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj)
 	const char *buffer = git_odb_object_data(odb_obj);
 	const char *buffer_end = buffer + git_odb_object_size(odb_obj);
 
+	git_pool_init(&tree->pool, 1);
 	if (git_vector_init(&tree->entries, DEFAULT_TREE_SIZE, entry_sort_cmp) < 0)
 		return -1;
 
 	while (buffer < buffer_end) {
 		git_tree_entry *entry;
+		size_t filename_len;
+		const char *nul;
 		int attr;
 
 		if (git__strtol32(&attr, buffer, &buffer, 8) < 0 || !buffer)
@@ -398,26 +436,23 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj)
 		if (*buffer++ != ' ')
 			return tree_error("Failed to parse tree. Object is corrupted", NULL);
 
-		if (memchr(buffer, 0, buffer_end - buffer) == NULL)
+		if ((nul = memchr(buffer, 0, buffer_end - buffer)) == NULL)
 			return tree_error("Failed to parse tree. Object is corrupted", NULL);
 
+		filename_len = nul - buffer;
 		/** Allocate the entry and store it in the entries vector */
 		{
-			entry = alloc_entry(buffer);
+			entry = alloc_entry_pooled(&tree->pool, buffer, filename_len);
 			GITERR_CHECK_ALLOC(entry);
 
-			if (git_vector_insert(&tree->entries, entry) < 0) {
-				git__free(entry);
+			if (git_vector_insert(&tree->entries, entry) < 0)
 				return -1;
-			}
 
 			entry->attr = attr;
 		}
 
-		while (buffer < buffer_end && *buffer != 0)
-			buffer++;
-
-		buffer++;
+		/* Advance to the ID just after the path */
+		buffer += filename_len + 1;
 
 		git_oid_fromraw(&entry->oid, (const unsigned char *)buffer);
 		buffer += GIT_OID_RAWSZ;
diff --git a/src/tree.h b/src/tree.h
index d01b6fd..914d788 100644
--- a/src/tree.h
+++ b/src/tree.h
@@ -12,17 +12,20 @@
 #include "odb.h"
 #include "vector.h"
 #include "strmap.h"
+#include "pool.h"
 
 struct git_tree_entry {
 	uint16_t attr;
+	uint16_t filename_len;
 	git_oid oid;
-	size_t filename_len;
-	char filename[1];
+	bool pooled;
+	char filename[GIT_FLEX_ARRAY];
 };
 
 struct git_tree {
 	git_object object;
 	git_vector entries;
+	git_pool pool;
 };
 
 struct git_treebuilder {