treebuilder: use a map instead of vector to store the entries Finding a filename in a vector means we need to resort it every time we want to read from it, which includes every time we want to write to it as well, as we want to find duplicate keys. A hash-map fits what we want to do much more accurately, as we do not care about sorting, but just the particular filename. We still keep removed entries around, as the interface let you assume they were going to be around until the treebuilder is cleared or freed, but in this case that involves an append to a vector in the filter case, which can now fail. The only time we care about sorting is when we write out the tree, so let's make that the only time we do any sorting.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
diff --git a/include/git2/tree.h b/include/git2/tree.h
index 56922d4..8f1d8a0 100644
--- a/include/git2/tree.h
+++ b/include/git2/tree.h
@@ -354,7 +354,7 @@ typedef int (*git_treebuilder_filter_cb)(
* @param filter Callback to filter entries
* @param payload Extra data to pass to filter callback
*/
-GIT_EXTERN(void) git_treebuilder_filter(
+GIT_EXTERN(int) git_treebuilder_filter(
git_treebuilder *bld,
git_treebuilder_filter_cb filter,
void *payload);
diff --git a/src/tree.c b/src/tree.c
index b64efe4..7d28c86 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -17,6 +17,8 @@
#define DEFAULT_TREE_SIZE 16
#define MAX_FILEMODE_BYTES 6
+GIT__USE_STRMAP;
+
static bool valid_filemode(const int filemode)
{
return (filemode == GIT_FILEMODE_TREE
@@ -450,6 +452,7 @@ static int append_entry(
git_filemode_t filemode)
{
git_tree_entry *entry;
+ int error = 0;
if (!valid_entry_name(filename))
return tree_error("Failed to insert entry. Invalid name for a tree entry", filename);
@@ -460,8 +463,9 @@ static int append_entry(
git_oid_cpy(&entry->oid, id);
entry->attr = (uint16_t)filemode;
- if (git_vector_insert_sorted(&bld->entries, entry, NULL) < 0) {
- git__free(entry);
+ git_strmap_insert(bld->map, entry->filename, entry, error);
+ if (error < 0) {
+ giterr_set(GITERR_TREE, "failed to append entry %s to the tree builder", filename);
return -1;
}
@@ -610,17 +614,18 @@ int git_tree__write_index(
int git_treebuilder_create(git_treebuilder **builder_p, const git_tree *source)
{
git_treebuilder *bld;
- size_t i, source_entries = DEFAULT_TREE_SIZE;
+ size_t i;
assert(builder_p);
bld = git__calloc(1, sizeof(git_treebuilder));
GITERR_CHECK_ALLOC(bld);
- if (source != NULL)
- source_entries = source->entries.length;
+ if (git_strmap_alloc(&bld->map) < 0) {
+ return -1;
+ }
- if (git_vector_init(&bld->entries, source_entries, entry_sort_cmp) < 0)
+ if (git_vector_init(&bld->removed, 0, NULL) < 0)
goto on_error;
if (source != NULL) {
@@ -651,7 +656,8 @@ int git_treebuilder_insert(
git_filemode_t filemode)
{
git_tree_entry *entry;
- size_t pos;
+ int error;
+ git_strmap_iter iter;
assert(bld && id && filename);
@@ -661,24 +667,25 @@ int git_treebuilder_insert(
if (!valid_entry_name(filename))
return tree_error("Failed to insert entry. Invalid name for a tree entry", filename);
- if (!tree_key_search(&pos, &bld->entries, filename, strlen(filename))) {
- entry = git_vector_get(&bld->entries, pos);
- if (entry->removed) {
- entry->removed = 0;
- bld->entrycount++;
- }
- } else {
- entry = alloc_entry(filename);
- GITERR_CHECK_ALLOC(entry);
+ entry = alloc_entry(filename);
+ GITERR_CHECK_ALLOC(entry);
- if (git_vector_insert_sorted(&bld->entries, entry, NULL) < 0) {
- git__free(entry);
+ iter = git_strmap_lookup_index(bld->map, entry->filename);
+ if (git_strmap_valid_index(bld->map, iter)) {
+ git_tree_entry *old_val = git_strmap_value_at(bld->map, iter);
+ if (git_vector_insert(&bld->removed, old_val) < 0)
return -1;
- }
- bld->entrycount++;
+ git_strmap_delete_at(bld->map, iter);
}
+ git_strmap_insert(bld->map, entry->filename, entry, error);
+ if (error < 0) {
+ giterr_set(GITERR_TREE, "failed to insert %s", filename);
+ return -1;
+ }
+
+ bld->entrycount++;
git_oid_cpy(&entry->oid, id);
entry->attr = filemode;
@@ -690,17 +697,14 @@ int git_treebuilder_insert(
static git_tree_entry *treebuilder_get(git_treebuilder *bld, const char *filename)
{
- size_t idx;
- git_tree_entry *entry;
+ git_tree_entry *entry = NULL;
+ git_strmap_iter pos;
assert(bld && filename);
- if (tree_key_search(&idx, &bld->entries, filename, strlen(filename)) < 0)
- return NULL;
-
- entry = git_vector_get(&bld->entries, idx);
- if (entry->removed)
- return NULL;
+ pos = git_strmap_lookup_index(bld->map, filename);
+ if (git_strmap_valid_index(bld->map, pos))
+ entry = git_strmap_value_at(bld->map, pos);
return entry;
}
@@ -712,12 +716,16 @@ const git_tree_entry *git_treebuilder_get(git_treebuilder *bld, const char *file
int git_treebuilder_remove(git_treebuilder *bld, const char *filename)
{
- git_tree_entry *remove_ptr = treebuilder_get(bld, filename);
+ git_tree_entry *entry = treebuilder_get(bld, filename);
- if (remove_ptr == NULL || remove_ptr->removed)
+ if (entry == NULL)
return tree_error("Failed to remove entry. File isn't in the tree", filename);
- remove_ptr->removed = 1;
+ if (git_vector_insert(&bld->removed, entry) < 0)
+ return -1;
+
+ git_strmap_delete(bld->map, filename);
+
bld->entrycount--;
return 0;
}
@@ -728,19 +736,26 @@ int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *b
size_t i;
git_buf tree = GIT_BUF_INIT;
git_odb *odb;
+ git_tree_entry *entry;
+ git_vector entries;
assert(bld);
- git_vector_sort(&bld->entries);
+ if (git_vector_init(&entries, bld->entrycount, entry_sort_cmp) < 0)
+ return -1;
- /* Grow the buffer beforehand to an estimated size */
- error = git_buf_grow(&tree, bld->entries.length * 72);
+ git_strmap_foreach_value(bld->map, entry, {
+ if (git_vector_insert(&entries, entry) < 0)
+ return -1;
+ });
- for (i = 0; i < bld->entries.length && !error; ++i) {
- git_tree_entry *entry = git_vector_get(&bld->entries, i);
+ git_vector_sort(&entries);
+
+ /* Grow the buffer beforehand to an estimated size */
+ error = git_buf_grow(&tree, bld->entrycount * 72);
- if (entry->removed)
- continue;
+ for (i = 0; i < entries.length && !error; ++i) {
+ git_tree_entry *entry = git_vector_get(&entries, i);
git_buf_printf(&tree, "%o ", entry->attr);
git_buf_put(&tree, entry->filename, entry->filename_len + 1);
@@ -750,6 +765,8 @@ int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *b
error = -1;
}
+ git_vector_free(&entries);
+
if (!error &&
!(error = git_repository_odb__weakptr(&odb, repo)))
error = git_odb_write(oid, odb, tree.ptr, tree.size, GIT_OBJ_TREE);
@@ -758,22 +775,27 @@ int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *b
return error;
}
-void git_treebuilder_filter(
+int git_treebuilder_filter(
git_treebuilder *bld,
git_treebuilder_filter_cb filter,
void *payload)
{
- size_t i;
+ const char *filename;
git_tree_entry *entry;
assert(bld && filter);
- git_vector_foreach(&bld->entries, i, entry) {
- if (!entry->removed && filter(entry, payload)) {
- entry->removed = 1;
- bld->entrycount--;
- }
- }
+ git_strmap_foreach(bld->map, filename, entry, {
+ if (filter(entry, payload)) {
+ if (git_vector_insert(&bld->removed, entry) < 0)
+ return -1;
+
+ git_strmap_delete(bld->map, filename);
+ bld->entrycount--;
+ }
+ });
+
+ return 0;
}
void git_treebuilder_clear(git_treebuilder *bld)
@@ -783,10 +805,12 @@ void git_treebuilder_clear(git_treebuilder *bld)
assert(bld);
- git_vector_foreach(&bld->entries, i, e)
+ git_vector_foreach(&bld->removed, i, e)
git_tree_entry_free(e);
- git_vector_clear(&bld->entries);
+ git_strmap_foreach_value(bld->map, e, git_tree_entry_free(e));
+
+ git_vector_clear(&bld->removed);
bld->entrycount = 0;
}
@@ -796,7 +820,8 @@ void git_treebuilder_free(git_treebuilder *bld)
return;
git_treebuilder_clear(bld);
- git_vector_free(&bld->entries);
+ git_vector_free(&bld->removed);
+ git_strmap_free(bld->map);
git__free(bld);
}
diff --git a/src/tree.h b/src/tree.h
index f07039a..38daf21 100644
--- a/src/tree.h
+++ b/src/tree.h
@@ -11,9 +11,9 @@
#include "repository.h"
#include "odb.h"
#include "vector.h"
+#include "strmap.h"
struct git_tree_entry {
- uint16_t removed;
uint16_t attr;
git_oid oid;
size_t filename_len;
@@ -26,8 +26,9 @@ struct git_tree {
};
struct git_treebuilder {
- git_vector entries;
+ git_vector removed;
size_t entrycount; /* vector may contain "removed" entries */
+ git_strmap *map;
};
GIT_INLINE(bool) git_tree_entry__is_tree(const struct git_tree_entry *e)
diff --git a/tests/object/tree/write.c b/tests/object/tree/write.c
index 45356e8..ddb62e2 100644
--- a/tests/object/tree/write.c
+++ b/tests/object/tree/write.c
@@ -104,6 +104,7 @@ void test_object_tree_write__subtree(void)
void test_object_tree_write__sorted_subtrees(void)
{
git_treebuilder *builder;
+ git_tree *tree;
unsigned int i;
int position_c = -1, position_cake = -1, position_config = -1;
@@ -143,8 +144,9 @@ void test_object_tree_write__sorted_subtrees(void)
cl_git_pass(git_treebuilder_write(&tree_oid, g_repo, builder));
- for (i = 0; i < builder->entries.length; ++i) {
- git_tree_entry *entry = git_vector_get(&builder->entries, i);
+ cl_git_pass(git_tree_lookup(&tree, g_repo, &tree_oid));
+ for (i = 0; i < git_tree_entrycount(tree); i++) {
+ const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
if (strcmp(entry->filename, "c") == 0)
position_c = i;
@@ -156,6 +158,8 @@ void test_object_tree_write__sorted_subtrees(void)
position_config = i;
}
+ git_tree_free(tree);
+
cl_assert(position_c != -1);
cl_assert(position_cake != -1);
cl_assert(position_config != -1);