Merge pull request #3508 from libgit2/cmn/tree-parse-speed Improvements to tree parsing speed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
diff --git a/src/tree.c b/src/tree.c
index bdd1766..0a32868 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -81,14 +81,26 @@ int git_tree_entry_icmp(const git_tree_entry *e1, const git_tree_entry *e2)
git__strncasecmp);
}
-static git_tree_entry *alloc_entry(const char *filename)
+/**
+ * Allocate either from the pool or from the system allocator
+ */
+static git_tree_entry *alloc_entry_base(git_pool *pool, const char *filename, size_t filename_len)
{
git_tree_entry *entry = NULL;
- size_t filename_len = strlen(filename), tree_len;
+ size_t tree_len;
+
+ if (filename_len > UINT16_MAX) {
+ giterr_set(GITERR_INVALID, "tree entry is over UINT16_MAX in length");
+ return NULL;
+ }
if (GIT_ADD_SIZET_OVERFLOW(&tree_len, sizeof(git_tree_entry), filename_len) ||
- GIT_ADD_SIZET_OVERFLOW(&tree_len, tree_len, 1) ||
- !(entry = git__malloc(tree_len)))
+ GIT_ADD_SIZET_OVERFLOW(&tree_len, tree_len, 1))
+ return NULL;
+
+ entry = pool ? git_pool_malloc(pool, tree_len) :
+ git__malloc(tree_len);
+ if (!entry)
return NULL;
memset(entry, 0x0, sizeof(git_tree_entry));
@@ -99,9 +111,31 @@ static git_tree_entry *alloc_entry(const char *filename)
return entry;
}
+/**
+ * Allocate a tree entry, using the poolin the tree which owns
+ * it. This is useful when reading trees, so we don't allocate a ton
+ * of small strings but can use the pool.
+ */
+static git_tree_entry *alloc_entry_pooled(git_pool *pool, const char *filename, size_t filename_len)
+{
+ git_tree_entry *entry = NULL;
+
+ if (!(entry = alloc_entry_base(pool, filename, filename_len)))
+ return NULL;
+
+ entry->pooled = true;
+
+ return entry;
+}
+
+static git_tree_entry *alloc_entry(const char *filename)
+{
+ return alloc_entry_base(NULL, filename, strlen(filename));
+}
+
struct tree_key_search {
const char *filename;
- size_t filename_len;
+ uint16_t filename_len;
};
static int homing_search_cmp(const void *key, const void *array_member)
@@ -198,7 +232,7 @@ static int tree_key_search(
void git_tree_entry_free(git_tree_entry *entry)
{
- if (entry == NULL)
+ if (entry == NULL || entry->pooled)
return;
git__free(entry);
@@ -233,6 +267,7 @@ void git_tree__free(void *_tree)
git_tree_entry_free(e);
git_vector_free(&tree->entries);
+ git_pool_clear(&tree->pool);
git__free(tree);
}
@@ -385,11 +420,14 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj)
const char *buffer = git_odb_object_data(odb_obj);
const char *buffer_end = buffer + git_odb_object_size(odb_obj);
+ git_pool_init(&tree->pool, 1);
if (git_vector_init(&tree->entries, DEFAULT_TREE_SIZE, entry_sort_cmp) < 0)
return -1;
while (buffer < buffer_end) {
git_tree_entry *entry;
+ size_t filename_len;
+ const char *nul;
int attr;
if (git__strtol32(&attr, buffer, &buffer, 8) < 0 || !buffer)
@@ -398,26 +436,23 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj)
if (*buffer++ != ' ')
return tree_error("Failed to parse tree. Object is corrupted", NULL);
- if (memchr(buffer, 0, buffer_end - buffer) == NULL)
+ if ((nul = memchr(buffer, 0, buffer_end - buffer)) == NULL)
return tree_error("Failed to parse tree. Object is corrupted", NULL);
+ filename_len = nul - buffer;
/** Allocate the entry and store it in the entries vector */
{
- entry = alloc_entry(buffer);
+ entry = alloc_entry_pooled(&tree->pool, buffer, filename_len);
GITERR_CHECK_ALLOC(entry);
- if (git_vector_insert(&tree->entries, entry) < 0) {
- git__free(entry);
+ if (git_vector_insert(&tree->entries, entry) < 0)
return -1;
- }
entry->attr = attr;
}
- while (buffer < buffer_end && *buffer != 0)
- buffer++;
-
- buffer++;
+ /* Advance to the ID just after the path */
+ buffer += filename_len + 1;
git_oid_fromraw(&entry->oid, (const unsigned char *)buffer);
buffer += GIT_OID_RAWSZ;
diff --git a/src/tree.h b/src/tree.h
index d01b6fd..914d788 100644
--- a/src/tree.h
+++ b/src/tree.h
@@ -12,17 +12,20 @@
#include "odb.h"
#include "vector.h"
#include "strmap.h"
+#include "pool.h"
struct git_tree_entry {
uint16_t attr;
+ uint16_t filename_len;
git_oid oid;
- size_t filename_len;
- char filename[1];
+ bool pooled;
+ char filename[GIT_FLEX_ARRAY];
};
struct git_tree {
git_object object;
git_vector entries;
+ git_pool pool;
};
struct git_treebuilder {