Commit 28c1451a7c863cb89d598c03d53c4e73497e4c83

Vicent Marti 2011-10-20T02:35:19

tree: Fix name lookups once and for all Double-pass binary search. Jeez.

diff --git a/include/git2/tree.h b/include/git2/tree.h
index e305950..8d638f7 100644
--- a/include/git2/tree.h
+++ b/include/git2/tree.h
@@ -88,9 +88,6 @@ GIT_EXTERN(unsigned int) git_tree_entrycount(git_tree *tree);
 /**
  * Lookup a tree entry by its filename
  *
- * Note that if the entry in the tree is a folder instead of
- * a standard file, the given name must be ended with a slash.
- *
  * @param tree a previously loaded tree.
  * @param filename the filename of the desired entry
  * @return the tree entry; NULL if not found
@@ -206,9 +203,6 @@ GIT_EXTERN(void) git_treebuilder_free(git_treebuilder *bld);
 /**
  * Get an entry from the builder from its filename
  *
- * Note that if the entry in the tree is a folder instead of
- * a standard file, the given name must be ended with a slash.
- *
  * The returned entry is owned by the builder and should
  * not be freed manually.
  *
diff --git a/src/tree.c b/src/tree.c
index 27c018f..00aefc2 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -20,53 +20,104 @@ static int valid_attributes(const int attributes)
 	return attributes >= 0 && attributes <= MAX_FILEMODE;
 }
 
+static int valid_entry_name(const char *filename)
+{
+	return strlen(filename) > 0 && strchr(filename, '/') == NULL;
+}
+
+static int entry_sort_cmp(const void *a, const void *b)
+{
+	const git_tree_entry *entry_a = (const git_tree_entry *)(a);
+	const git_tree_entry *entry_b = (const git_tree_entry *)(b);
+
+	return git_futils_cmp_path(
+		entry_a->filename, entry_a->filename_len, entry_a->attr & 040000,
+		entry_b->filename, entry_b->filename_len, entry_b->attr & 040000);
+}
+
+
 struct tree_key_search {
 	const char *filename;
 	size_t filename_len;
-	int is_folder;
 };
 
-static int entry_search_cmp(const void *key, const void *array_member)
+static int homing_search_cmp(const void *key, const void *array_member)
 {
 	const struct tree_key_search *ksearch = key;
 	const git_tree_entry *entry = array_member;
 
-	int result =
-		git_futils_cmp_path(
-			ksearch->filename, ksearch->filename_len, ksearch->is_folder,
-			entry->filename, entry->filename_len, entry->attr & 040000);
+	const size_t len1 = ksearch->filename_len;
+	const size_t len2 = entry->filename_len;
 
-	return result ? result : ((int)ksearch->filename_len - (int)entry->filename_len);
+	return memcmp(
+		ksearch->filename,
+		entry->filename,
+		len1 < len2 ? len1 : len2
+	);
 }
 
-static int entry_sort_cmp(const void *a, const void *b)
+/*
+ * Search for an entry in a given tree.
+ *
+ * Note that this search is performed in two steps because
+ * of the way tree entries are sorted internally in git:
+ *
+ * Entries in a tree are not sorted alphabetically; two entries
+ * with the same root prefix will have different positions
+ * depending on whether they are folders (subtrees) or normal files.
+ *
+ * Consequently, it is not possible to find an entry on the tree
+ * with a binary search if you don't know whether the filename
+ * you're looking for is a folder or a normal file.
+ *
+ * To work around this, we first perform a homing binary search
+ * on the tree, using the minimal length root prefix of our filename.
+ * Once the comparisons for this homing search start becoming
+ * ambiguous because of folder vs file sorting, we look linearly
+ * around the area for our target file.
+ */
+static int tree_key_search(git_vector *entries, const char *filename)
 {
-	const git_tree_entry *entry_a = (const git_tree_entry *)(a);
-	const git_tree_entry *entry_b = (const git_tree_entry *)(b);
+	struct tree_key_search ksearch;
+	const git_tree_entry *entry;
 
-	return git_futils_cmp_path(
-		entry_a->filename, entry_a->filename_len, entry_a->attr & 040000,
-		entry_b->filename, entry_b->filename_len, entry_b->attr & 040000);
-}
+	int homing, i;
 
-static int build_ksearch(struct tree_key_search *ksearch, const char *path)
-{
-	size_t len = strlen(path);
-	int is_folder = 0;
+	ksearch.filename = filename;
+	ksearch.filename_len = strlen(filename);
 
-	if (len && path[len - 1] == '/') {
-		is_folder = 1;
-		len--;
+	/* Initial homing search; find an entry on the tree with
+	 * the same prefix as the filename we're looking for */
+	homing = git_vector_bsearch2(entries, &homing_search_cmp, &ksearch);
+	if (homing < 0)
+		return homing;
+
+	/* We found a common prefix. Look forward as long as
+	 * there are entries that share the common prefix */
+	for (i = homing; i < (int)entries->length; ++i) {
+		entry = entries->contents[i];
+
+		if (homing_search_cmp(&ksearch, entry) != 0)
+			break;
+
+		if (strcmp(filename, entry->filename) == 0)
+			return i;
 	}
 
-	if (len == 0 || memchr(path, '/', len) != NULL)
-		return GIT_ERROR;
+	/* If we haven't found our filename yet, look backwards
+	 * too as long as we have entries with the same prefix */
+	for (i = homing - 1; i >= 0; --i) {
+		entry = entries->contents[i];
 
-	ksearch->filename = path;
-	ksearch->filename_len = len;
-	ksearch->is_folder = is_folder;
+		if (homing_search_cmp(&ksearch, entry) != 0)
+			break;
 
-	return GIT_SUCCESS;
+		if (strcmp(filename, entry->filename) == 0)
+			return i;
+	}
+
+	/* The filename doesn't exist at all */
+	return GIT_ENOTFOUND;
 }
 
 void git_tree__free(git_tree *tree)
@@ -128,14 +179,10 @@ int git_tree_entry_2object(git_object **object_out, git_repository *repo, const 
 const git_tree_entry *git_tree_entry_byname(git_tree *tree, const char *filename)
 {
 	int idx;
-	struct tree_key_search ksearch;
 
 	assert(tree && filename);
 
-	if (build_ksearch(&ksearch, filename) < GIT_SUCCESS)
-		return NULL;
-
-	idx = git_vector_bsearch2(&tree->entries, entry_search_cmp, &ksearch);
+	idx = tree_key_search(&tree->entries, filename);
 	if (idx == GIT_ENOTFOUND)
 		return NULL;
 
@@ -415,21 +462,18 @@ int git_treebuilder_insert(git_tree_entry **entry_out, git_treebuilder *bld, con
 {
 	git_tree_entry *entry;
 	int pos;
-	struct tree_key_search ksearch;
 
 	assert(bld && id && filename);
 
 	if (!valid_attributes(attributes))
 		return git__throw(GIT_ERROR, "Failed to insert entry. Invalid attributes");
 
-	if (build_ksearch(&ksearch, filename) < GIT_SUCCESS)
-		return git__throw(GIT_ERROR, "Failed to insert entry. Invalid filename '%s'", filename);
+	if (!valid_entry_name(filename))
+		return git__throw(GIT_ERROR, "Failed to insert entry. Invalid name for a tree entry");
 
-	if ((attributes & 040000) != 0) {
-		ksearch.is_folder = 1;
-	}
+	pos = tree_key_search(&bld->entries, filename);
 
-	if ((pos = git_vector_bsearch2(&bld->entries, entry_search_cmp, &ksearch)) != GIT_ENOTFOUND) {
+	if (pos >= 0) {
 		entry = git_vector_get(&bld->entries, pos);
 		if (entry->removed) {
 			entry->removed = 0;
@@ -464,15 +508,11 @@ static git_tree_entry *treebuilder_get(git_treebuilder *bld, const char *filenam
 {
 	int idx;
 	git_tree_entry *entry;
-	struct tree_key_search ksearch;
 
 	assert(bld && filename);
 
-	if (build_ksearch(&ksearch, filename) < GIT_SUCCESS)
-		return NULL;
-
-	idx = git_vector_bsearch2(&bld->entries, entry_search_cmp, &ksearch);
-	if (idx == GIT_ENOTFOUND)
+	idx = tree_key_search(&bld->entries, filename);
+	if (idx < 0)
 		return NULL;
 
 	entry = git_vector_get(&bld->entries, idx);