tree: Fix name lookups once and for all Double-pass binary search. Jeez.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
diff --git a/include/git2/tree.h b/include/git2/tree.h
index e305950..8d638f7 100644
--- a/include/git2/tree.h
+++ b/include/git2/tree.h
@@ -88,9 +88,6 @@ GIT_EXTERN(unsigned int) git_tree_entrycount(git_tree *tree);
/**
* Lookup a tree entry by its filename
*
- * Note that if the entry in the tree is a folder instead of
- * a standard file, the given name must be ended with a slash.
- *
* @param tree a previously loaded tree.
* @param filename the filename of the desired entry
* @return the tree entry; NULL if not found
@@ -206,9 +203,6 @@ GIT_EXTERN(void) git_treebuilder_free(git_treebuilder *bld);
/**
* Get an entry from the builder from its filename
*
- * Note that if the entry in the tree is a folder instead of
- * a standard file, the given name must be ended with a slash.
- *
* The returned entry is owned by the builder and should
* not be freed manually.
*
diff --git a/src/tree.c b/src/tree.c
index 27c018f..00aefc2 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -20,53 +20,104 @@ static int valid_attributes(const int attributes)
return attributes >= 0 && attributes <= MAX_FILEMODE;
}
+static int valid_entry_name(const char *filename)
+{
+ return strlen(filename) > 0 && strchr(filename, '/') == NULL;
+}
+
+static int entry_sort_cmp(const void *a, const void *b)
+{
+ const git_tree_entry *entry_a = (const git_tree_entry *)(a);
+ const git_tree_entry *entry_b = (const git_tree_entry *)(b);
+
+ return git_futils_cmp_path(
+ entry_a->filename, entry_a->filename_len, entry_a->attr & 040000,
+ entry_b->filename, entry_b->filename_len, entry_b->attr & 040000);
+}
+
+
struct tree_key_search {
const char *filename;
size_t filename_len;
- int is_folder;
};
-static int entry_search_cmp(const void *key, const void *array_member)
+static int homing_search_cmp(const void *key, const void *array_member)
{
const struct tree_key_search *ksearch = key;
const git_tree_entry *entry = array_member;
- int result =
- git_futils_cmp_path(
- ksearch->filename, ksearch->filename_len, ksearch->is_folder,
- entry->filename, entry->filename_len, entry->attr & 040000);
+ const size_t len1 = ksearch->filename_len;
+ const size_t len2 = entry->filename_len;
- return result ? result : ((int)ksearch->filename_len - (int)entry->filename_len);
+ return memcmp(
+ ksearch->filename,
+ entry->filename,
+ len1 < len2 ? len1 : len2
+ );
}
-static int entry_sort_cmp(const void *a, const void *b)
+/*
+ * Search for an entry in a given tree.
+ *
+ * Note that this search is performed in two steps because
+ * of the way tree entries are sorted internally in git:
+ *
+ * Entries in a tree are not sorted alphabetically; two entries
+ * with the same root prefix will have different positions
+ * depending on whether they are folders (subtrees) or normal files.
+ *
+ * Consequently, it is not possible to find an entry on the tree
+ * with a binary search if you don't know whether the filename
+ * you're looking for is a folder or a normal file.
+ *
+ * To work around this, we first perform a homing binary search
+ * on the tree, using the minimal length root prefix of our filename.
+ * Once the comparisons for this homing search start becoming
+ * ambiguous because of folder vs file sorting, we look linearly
+ * around the area for our target file.
+ */
+static int tree_key_search(git_vector *entries, const char *filename)
{
- const git_tree_entry *entry_a = (const git_tree_entry *)(a);
- const git_tree_entry *entry_b = (const git_tree_entry *)(b);
+ struct tree_key_search ksearch;
+ const git_tree_entry *entry;
- return git_futils_cmp_path(
- entry_a->filename, entry_a->filename_len, entry_a->attr & 040000,
- entry_b->filename, entry_b->filename_len, entry_b->attr & 040000);
-}
+ int homing, i;
-static int build_ksearch(struct tree_key_search *ksearch, const char *path)
-{
- size_t len = strlen(path);
- int is_folder = 0;
+ ksearch.filename = filename;
+ ksearch.filename_len = strlen(filename);
- if (len && path[len - 1] == '/') {
- is_folder = 1;
- len--;
+ /* Initial homing search; find an entry on the tree with
+ * the same prefix as the filename we're looking for */
+ homing = git_vector_bsearch2(entries, &homing_search_cmp, &ksearch);
+ if (homing < 0)
+ return homing;
+
+ /* We found a common prefix. Look forward as long as
+ * there are entries that share the common prefix */
+ for (i = homing; i < (int)entries->length; ++i) {
+ entry = entries->contents[i];
+
+ if (homing_search_cmp(&ksearch, entry) != 0)
+ break;
+
+ if (strcmp(filename, entry->filename) == 0)
+ return i;
}
- if (len == 0 || memchr(path, '/', len) != NULL)
- return GIT_ERROR;
+ /* If we haven't found our filename yet, look backwards
+ * too as long as we have entries with the same prefix */
+ for (i = homing - 1; i >= 0; --i) {
+ entry = entries->contents[i];
- ksearch->filename = path;
- ksearch->filename_len = len;
- ksearch->is_folder = is_folder;
+ if (homing_search_cmp(&ksearch, entry) != 0)
+ break;
- return GIT_SUCCESS;
+ if (strcmp(filename, entry->filename) == 0)
+ return i;
+ }
+
+ /* The filename doesn't exist at all */
+ return GIT_ENOTFOUND;
}
void git_tree__free(git_tree *tree)
@@ -128,14 +179,10 @@ int git_tree_entry_2object(git_object **object_out, git_repository *repo, const
const git_tree_entry *git_tree_entry_byname(git_tree *tree, const char *filename)
{
int idx;
- struct tree_key_search ksearch;
assert(tree && filename);
- if (build_ksearch(&ksearch, filename) < GIT_SUCCESS)
- return NULL;
-
- idx = git_vector_bsearch2(&tree->entries, entry_search_cmp, &ksearch);
+ idx = tree_key_search(&tree->entries, filename);
if (idx == GIT_ENOTFOUND)
return NULL;
@@ -415,21 +462,18 @@ int git_treebuilder_insert(git_tree_entry **entry_out, git_treebuilder *bld, con
{
git_tree_entry *entry;
int pos;
- struct tree_key_search ksearch;
assert(bld && id && filename);
if (!valid_attributes(attributes))
return git__throw(GIT_ERROR, "Failed to insert entry. Invalid attributes");
- if (build_ksearch(&ksearch, filename) < GIT_SUCCESS)
- return git__throw(GIT_ERROR, "Failed to insert entry. Invalid filename '%s'", filename);
+ if (!valid_entry_name(filename))
+ return git__throw(GIT_ERROR, "Failed to insert entry. Invalid name for a tree entry");
- if ((attributes & 040000) != 0) {
- ksearch.is_folder = 1;
- }
+ pos = tree_key_search(&bld->entries, filename);
- if ((pos = git_vector_bsearch2(&bld->entries, entry_search_cmp, &ksearch)) != GIT_ENOTFOUND) {
+ if (pos >= 0) {
entry = git_vector_get(&bld->entries, pos);
if (entry->removed) {
entry->removed = 0;
@@ -464,15 +508,11 @@ static git_tree_entry *treebuilder_get(git_treebuilder *bld, const char *filenam
{
int idx;
git_tree_entry *entry;
- struct tree_key_search ksearch;
assert(bld && filename);
- if (build_ksearch(&ksearch, filename) < GIT_SUCCESS)
- return NULL;
-
- idx = git_vector_bsearch2(&bld->entries, entry_search_cmp, &ksearch);
- if (idx == GIT_ENOTFOUND)
+ idx = tree_key_search(&bld->entries, filename);
+ if (idx < 0)
return NULL;
entry = git_vector_get(&bld->entries, idx);